Initial commit: Local Transcription App v1.0

Phase 1 Complete - Standalone Desktop Application

Features:
- Real-time speech-to-text with Whisper (faster-whisper)
- PySide6 desktop GUI with settings dialog
- Web server for OBS browser source integration
- Audio capture with automatic sample rate detection and resampling
- Noise suppression with Voice Activity Detection (VAD)
- Configurable display settings (font, timestamps, fade duration)
- Settings apply without restart (with automatic model reloading)
- Auto-fade for web display transcriptions
- CPU/GPU support with automatic device detection
- Standalone executable builds (PyInstaller)
- CUDA build support (works on systems without CUDA hardware)

Components:
- Audio capture with sounddevice
- Noise reduction with noisereduce + webrtcvad
- Transcription with faster-whisper
- GUI with PySide6
- Web server with FastAPI + WebSocket
- Configuration system with YAML

Build System:
- Standard builds (CPU-only): build.sh / build.bat
- CUDA builds (universal): build-cuda.sh / build-cuda.bat
- Comprehensive BUILD.md documentation
- Cross-platform support (Linux, Windows)

Documentation:
- README.md with project overview and quick start
- BUILD.md with detailed build instructions
- NEXT_STEPS.md with future enhancement roadmap
- INSTALL.md with setup instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-25 18:48:23 -08:00
commit 472233aec4
31 changed files with 5116 additions and 0 deletions

0
gui/__init__.py Normal file
View File

364
gui/main_window.py Normal file
View File

@@ -0,0 +1,364 @@
"""Main application window for the local transcription app."""
import customtkinter as ctk
from tkinter import filedialog, messagebox
import threading
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from gui.transcription_display import TranscriptionDisplay
from gui.settings_dialog import SettingsDialog
class MainWindow(ctk.CTk):
"""Main application window."""
def __init__(self):
"""Initialize the main window."""
super().__init__()
# Application state
self.is_transcribing = False
self.config = Config()
self.device_manager = DeviceManager()
# Components (initialized later)
self.audio_capture: AudioCapture = None
self.noise_suppressor: NoiseSuppressor = None
self.transcription_engine: TranscriptionEngine = None
# Configure window
self.title("Local Transcription")
self.geometry("900x700")
# Set theme
ctk.set_appearance_mode(self.config.get('display.theme', 'dark'))
ctk.set_default_color_theme("blue")
# Create UI
self._create_widgets()
# Handle window close
self.protocol("WM_DELETE_WINDOW", self._on_closing)
# Initialize components after GUI is ready (delay to avoid XCB threading issues)
self.after(100, self._initialize_components)
def _create_widgets(self):
"""Create all UI widgets."""
# Header frame
header_frame = ctk.CTkFrame(self, height=80)
header_frame.pack(fill="x", padx=10, pady=(10, 0))
header_frame.pack_propagate(False)
# Title
title_label = ctk.CTkLabel(
header_frame,
text="Local Transcription",
font=("", 24, "bold")
)
title_label.pack(side="left", padx=20, pady=20)
# Settings button
self.settings_button = ctk.CTkButton(
header_frame,
text="⚙ Settings",
command=self._open_settings,
width=120
)
self.settings_button.pack(side="right", padx=20, pady=20)
# Status frame
status_frame = ctk.CTkFrame(self, height=60)
status_frame.pack(fill="x", padx=10, pady=(10, 0))
status_frame.pack_propagate(False)
# Status label
self.status_label = ctk.CTkLabel(
status_frame,
text="⚫ Ready",
font=("", 14)
)
self.status_label.pack(side="left", padx=20)
# Device info
device_info = self.device_manager.get_device_info()
device_text = device_info[0][1] if device_info else "No device"
self.device_label = ctk.CTkLabel(
status_frame,
text=f"Device: {device_text}",
font=("", 12)
)
self.device_label.pack(side="left", padx=20)
# User name display
user_name = self.config.get('user.name', 'User')
self.user_label = ctk.CTkLabel(
status_frame,
text=f"User: {user_name}",
font=("", 12)
)
self.user_label.pack(side="left", padx=20)
# Transcription display frame
display_frame = ctk.CTkFrame(self)
display_frame.pack(fill="both", expand=True, padx=10, pady=10)
# Transcription display
self.transcription_display = TranscriptionDisplay(
display_frame,
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font=("Courier", self.config.get('display.font_size', 12))
)
self.transcription_display.pack(fill="both", expand=True, padx=10, pady=10)
# Control frame
control_frame = ctk.CTkFrame(self, height=80)
control_frame.pack(fill="x", padx=10, pady=(0, 10))
control_frame.pack_propagate(False)
# Start/Stop button
self.start_button = ctk.CTkButton(
control_frame,
text="▶ Start Transcription",
command=self._toggle_transcription,
width=200,
height=50,
font=("", 16, "bold"),
fg_color="green"
)
self.start_button.pack(side="left", padx=20, pady=15)
# Clear button
self.clear_button = ctk.CTkButton(
control_frame,
text="Clear",
command=self._clear_transcriptions,
width=120,
height=50
)
self.clear_button.pack(side="left", padx=10, pady=15)
# Save button
self.save_button = ctk.CTkButton(
control_frame,
text="💾 Save",
command=self._save_transcriptions,
width=120,
height=50
)
self.save_button.pack(side="left", padx=10, pady=15)
def _initialize_components(self):
"""Initialize audio, noise suppression, and transcription components."""
# Update status
self.status_label.configure(text="⚙ Initializing...")
self.update()
try:
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model (synchronously to avoid X11 threading issues)
success = self.transcription_engine.load_model()
if success:
self.status_label.configure(text="✓ Ready")
else:
self.status_label.configure(text="❌ Model loading failed")
messagebox.showerror("Error", "Failed to load transcription model")
except Exception as e:
print(f"Error initializing components: {e}")
self.status_label.configure(text="❌ Initialization failed")
messagebox.showerror("Error", f"Failed to initialize:\n{e}")
def _update_status(self, status: str):
"""Update status label (thread-safe)."""
self.after(0, lambda: self.status_label.configure(text=status))
def _toggle_transcription(self):
"""Start or stop transcription."""
if not self.is_transcribing:
self._start_transcription()
else:
self._stop_transcription()
def _start_transcription(self):
"""Start transcription."""
try:
# Check if engine is ready
if not self.transcription_engine or not self.transcription_engine.is_loaded:
messagebox.showerror("Error", "Transcription engine not ready")
return
# Get audio device
audio_device_str = self.config.get('audio.input_device', 'default')
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
# Initialize audio capture
self.audio_capture = AudioCapture(
sample_rate=self.config.get('audio.sample_rate', 16000),
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
device=audio_device
)
# Initialize noise suppressor
self.noise_suppressor = NoiseSuppressor(
sample_rate=self.config.get('audio.sample_rate', 16000),
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
strength=self.config.get('noise_suppression.strength', 0.7),
use_vad=self.config.get('processing.use_vad', True)
)
# Start recording
self.audio_capture.start_recording(callback=self._process_audio_chunk)
# Update UI
self.is_transcribing = True
self.start_button.configure(text="⏸ Stop Transcription", fg_color="red")
self.status_label.configure(text="🔴 Recording...")
except Exception as e:
messagebox.showerror("Error", f"Failed to start transcription:\n{e}")
print(f"Error starting transcription: {e}")
def _stop_transcription(self):
"""Stop transcription."""
try:
# Stop recording
if self.audio_capture:
self.audio_capture.stop_recording()
# Update UI
self.is_transcribing = False
self.start_button.configure(text="▶ Start Transcription", fg_color="green")
self.status_label.configure(text="✓ Ready")
except Exception as e:
messagebox.showerror("Error", f"Failed to stop transcription:\n{e}")
print(f"Error stopping transcription: {e}")
def _process_audio_chunk(self, audio_chunk):
"""Process an audio chunk (noise suppression + transcription)."""
def process():
try:
# Apply noise suppression
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
# Skip if silent (VAD filtered it out)
if processed_audio is None:
return
# Transcribe
user_name = self.config.get('user.name', 'User')
result = self.transcription_engine.transcribe(
processed_audio,
sample_rate=self.config.get('audio.sample_rate', 16000),
user_name=user_name
)
# Display result
if result:
self.after(0, lambda: self.transcription_display.add_transcription(
text=result.text,
user_name=result.user_name,
timestamp=result.timestamp
))
except Exception as e:
print(f"Error processing audio: {e}")
# Run in background thread
threading.Thread(target=process, daemon=True).start()
def _clear_transcriptions(self):
"""Clear all transcriptions."""
if messagebox.askyesno("Clear Transcriptions", "Are you sure you want to clear all transcriptions?"):
self.transcription_display.clear()
def _save_transcriptions(self):
"""Save transcriptions to file."""
filepath = filedialog.asksaveasfilename(
defaultextension=".txt",
filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
)
if filepath:
if self.transcription_display.save_to_file(filepath):
messagebox.showinfo("Saved", f"Transcriptions saved to:\n{filepath}")
else:
messagebox.showerror("Error", "Failed to save transcriptions")
def _open_settings(self):
"""Open settings dialog."""
# Get audio devices
audio_devices = AudioCapture.get_input_devices()
if not audio_devices:
audio_devices = [(0, "Default")]
# Get compute devices
compute_devices = self.device_manager.get_device_info()
compute_devices.insert(0, ("auto", "Auto-detect"))
# Open settings dialog
SettingsDialog(
self,
self.config,
audio_devices,
compute_devices,
on_save=self._on_settings_saved
)
def _on_settings_saved(self):
"""Handle settings being saved."""
# Update user label
user_name = self.config.get('user.name', 'User')
self.user_label.configure(text=f"User: {user_name}")
# Update display settings
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(self.config.get('display.show_timestamps', True))
# Note: Model/device changes require restart
messagebox.showinfo(
"Settings Saved",
"Some settings (model size, device) require restarting the application to take effect."
)
def _on_closing(self):
"""Handle window closing."""
# Stop transcription if running
if self.is_transcribing:
self._stop_transcription()
# Unload model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Close window
self.destroy()

524
gui/main_window_qt.py Normal file
View File

@@ -0,0 +1,524 @@
"""PySide6 main application window for the local transcription app."""
from PySide6.QtWidgets import (
QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QFileDialog, QMessageBox
)
from PySide6.QtCore import Qt, QThread, Signal
from PySide6.QtGui import QFont
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.audio_capture import AudioCapture
from client.noise_suppression import NoiseSuppressor
from client.transcription_engine import TranscriptionEngine
from gui.transcription_display_qt import TranscriptionDisplay
from gui.settings_dialog_qt import SettingsDialog
from server.web_display import TranscriptionWebServer
import asyncio
from threading import Thread
class WebServerThread(Thread):
"""Thread for running the web server."""
def __init__(self, web_server):
super().__init__(daemon=True)
self.web_server = web_server
self.loop = None
def run(self):
"""Run the web server in async event loop."""
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
self.loop.run_until_complete(self.web_server.start())
class ModelLoaderThread(QThread):
"""Thread for loading the Whisper model without blocking the GUI."""
finished = Signal(bool, str) # success, message
def __init__(self, transcription_engine):
super().__init__()
self.transcription_engine = transcription_engine
def run(self):
"""Load the model in background thread."""
try:
success = self.transcription_engine.load_model()
if success:
self.finished.emit(True, "Model loaded successfully")
else:
self.finished.emit(False, "Failed to load model")
except Exception as e:
self.finished.emit(False, f"Error loading model: {e}")
class MainWindow(QMainWindow):
"""Main application window using PySide6."""
def __init__(self):
"""Initialize the main window."""
super().__init__()
# Application state
self.is_transcribing = False
self.config = Config()
self.device_manager = DeviceManager()
# Components (initialized later)
self.audio_capture: AudioCapture = None
self.noise_suppressor: NoiseSuppressor = None
self.transcription_engine: TranscriptionEngine = None
self.model_loader_thread: ModelLoaderThread = None
# Track current model settings
self.current_model_size: str = None
self.current_device_config: str = None
# Web server components
self.web_server: TranscriptionWebServer = None
self.web_server_thread: WebServerThread = None
# Configure window
self.setWindowTitle("Local Transcription")
self.resize(900, 700)
# Create UI
self._create_widgets()
# Initialize components (in background)
self._initialize_components()
# Start web server if enabled
self._start_web_server_if_enabled()
def _create_widgets(self):
"""Create all UI widgets."""
# Central widget
central_widget = QWidget()
self.setCentralWidget(central_widget)
main_layout = QVBoxLayout()
central_widget.setLayout(main_layout)
# Header
header_widget = QWidget()
header_widget.setFixedHeight(80)
header_layout = QHBoxLayout()
header_widget.setLayout(header_layout)
title_label = QLabel("Local Transcription")
title_font = QFont()
title_font.setPointSize(24)
title_font.setBold(True)
title_label.setFont(title_font)
header_layout.addWidget(title_label)
header_layout.addStretch()
self.settings_button = QPushButton("⚙ Settings")
self.settings_button.setFixedSize(120, 40)
self.settings_button.clicked.connect(self._open_settings)
header_layout.addWidget(self.settings_button)
main_layout.addWidget(header_widget)
# Status bar
status_widget = QWidget()
status_widget.setFixedHeight(60)
status_layout = QHBoxLayout()
status_widget.setLayout(status_layout)
self.status_label = QLabel("⚫ Initializing...")
status_font = QFont()
status_font.setPointSize(14)
self.status_label.setFont(status_font)
status_layout.addWidget(self.status_label)
device_info = self.device_manager.get_device_info()
device_text = device_info[0][1] if device_info else "No device"
self.device_label = QLabel(f"Device: {device_text}")
status_layout.addWidget(self.device_label)
user_name = self.config.get('user.name', 'User')
self.user_label = QLabel(f"User: {user_name}")
status_layout.addWidget(self.user_label)
status_layout.addStretch()
main_layout.addWidget(status_widget)
# Transcription display
self.transcription_display = TranscriptionDisplay(
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font_family=self.config.get('display.font_family', 'Courier'),
font_size=self.config.get('display.font_size', 12)
)
main_layout.addWidget(self.transcription_display)
# Control buttons
control_widget = QWidget()
control_widget.setFixedHeight(80)
control_layout = QHBoxLayout()
control_widget.setLayout(control_layout)
self.start_button = QPushButton("▶ Start Transcription")
self.start_button.setFixedSize(240, 50)
button_font = QFont()
button_font.setPointSize(14)
button_font.setBold(True)
self.start_button.setFont(button_font)
self.start_button.clicked.connect(self._toggle_transcription)
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
control_layout.addWidget(self.start_button)
self.clear_button = QPushButton("Clear")
self.clear_button.setFixedSize(120, 50)
self.clear_button.clicked.connect(self._clear_transcriptions)
control_layout.addWidget(self.clear_button)
self.save_button = QPushButton("💾 Save")
self.save_button.setFixedSize(120, 50)
self.save_button.clicked.connect(self._save_transcriptions)
control_layout.addWidget(self.save_button)
control_layout.addStretch()
main_layout.addWidget(control_widget)
def _initialize_components(self):
"""Initialize audio, noise suppression, and transcription components."""
# Update status
self.status_label.setText("⚙ Initializing...")
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
# Track current settings
self.current_model_size = model_size
self.current_device_config = device_config
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model in background thread
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
self.model_loader_thread.finished.connect(self._on_model_loaded)
self.model_loader_thread.start()
def _on_model_loaded(self, success: bool, message: str):
"""Handle model loading completion."""
if success:
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
self.start_button.setEnabled(True)
else:
self.status_label.setText("❌ Model loading failed")
QMessageBox.critical(self, "Error", message)
self.start_button.setEnabled(False)
def _start_web_server_if_enabled(self):
"""Start web server."""
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
show_timestamps = self.config.get('display.show_timestamps', True)
fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
print(f"Starting web server at http://{host}:{port}")
self.web_server = TranscriptionWebServer(
host=host,
port=port,
show_timestamps=show_timestamps,
fade_after_seconds=fade_after_seconds
)
self.web_server_thread = WebServerThread(self.web_server)
self.web_server_thread.start()
def _toggle_transcription(self):
"""Start or stop transcription."""
if not self.is_transcribing:
self._start_transcription()
else:
self._stop_transcription()
def _start_transcription(self):
"""Start transcription."""
try:
# Check if engine is ready
if not self.transcription_engine or not self.transcription_engine.is_loaded:
QMessageBox.critical(self, "Error", "Transcription engine not ready")
return
# Get audio device
audio_device_str = self.config.get('audio.input_device', 'default')
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
# Initialize audio capture
self.audio_capture = AudioCapture(
sample_rate=self.config.get('audio.sample_rate', 16000),
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
device=audio_device
)
# Initialize noise suppressor
self.noise_suppressor = NoiseSuppressor(
sample_rate=self.config.get('audio.sample_rate', 16000),
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
strength=self.config.get('noise_suppression.strength', 0.7),
use_vad=self.config.get('processing.use_vad', True)
)
# Start recording
self.audio_capture.start_recording(callback=self._process_audio_chunk)
# Update UI
self.is_transcribing = True
self.start_button.setText("⏸ Stop Transcription")
self.start_button.setStyleSheet("background-color: #e74c3c; color: white;")
self.status_label.setText("🔴 Recording...")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}")
print(f"Error starting transcription: {e}")
def _stop_transcription(self):
"""Stop transcription."""
try:
# Stop recording
if self.audio_capture:
self.audio_capture.stop_recording()
# Update UI
self.is_transcribing = False
self.start_button.setText("▶ Start Transcription")
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
self.status_label.setText("✓ Ready")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}")
print(f"Error stopping transcription: {e}")
def _process_audio_chunk(self, audio_chunk):
"""Process an audio chunk (noise suppression + transcription)."""
def process():
try:
# Apply noise suppression
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
# Skip if silent (VAD filtered it out)
if processed_audio is None:
return
# Transcribe
user_name = self.config.get('user.name', 'User')
result = self.transcription_engine.transcribe(
processed_audio,
sample_rate=self.config.get('audio.sample_rate', 16000),
user_name=user_name
)
# Display result (use Qt signal for thread safety)
if result:
# We need to update UI from main thread
# Note: We don't pass timestamp - let the display widget create it
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, result.text),
Q_ARG(str, result.user_name)
)
# Broadcast to web server if enabled
if self.web_server and self.web_server_thread:
asyncio.run_coroutine_threadsafe(
self.web_server.broadcast_transcription(
result.text,
result.user_name,
result.timestamp
),
self.web_server_thread.loop
)
except Exception as e:
print(f"Error processing audio: {e}")
import traceback
traceback.print_exc()
# Run in background thread
from threading import Thread
Thread(target=process, daemon=True).start()
def _clear_transcriptions(self):
"""Clear all transcriptions."""
reply = QMessageBox.question(
self,
"Clear Transcriptions",
"Are you sure you want to clear all transcriptions?",
QMessageBox.Yes | QMessageBox.No
)
if reply == QMessageBox.Yes:
self.transcription_display.clear_all()
def _save_transcriptions(self):
"""Save transcriptions to file."""
filepath, _ = QFileDialog.getSaveFileName(
self,
"Save Transcriptions",
"",
"Text files (*.txt);;All files (*.*)"
)
if filepath:
if self.transcription_display.save_to_file(filepath):
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
else:
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
def _open_settings(self):
"""Open settings dialog."""
# Get audio devices
audio_devices = AudioCapture.get_input_devices()
if not audio_devices:
audio_devices = [(0, "Default")]
# Get compute devices
compute_devices = self.device_manager.get_device_info()
compute_devices.insert(0, ("auto", "Auto-detect"))
# Open settings dialog
dialog = SettingsDialog(
self,
self.config,
audio_devices,
compute_devices,
on_save=self._on_settings_saved
)
dialog.exec()
def _on_settings_saved(self):
"""Handle settings being saved."""
# Update user label
user_name = self.config.get('user.name', 'User')
self.user_label.setText(f"User: {user_name}")
# Update display settings
show_timestamps = self.config.get('display.show_timestamps', True)
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(show_timestamps)
self.transcription_display.set_font(
self.config.get('display.font_family', 'Courier'),
self.config.get('display.font_size', 12)
)
# Update web server settings
if self.web_server:
self.web_server.show_timestamps = show_timestamps
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
# Check if model/device settings changed - reload model if needed
new_model = self.config.get('transcription.model', 'base')
new_device_config = self.config.get('transcription.device', 'auto')
# Only reload if model size or device changed
if self.current_model_size != new_model or self.current_device_config != new_device_config:
self._reload_model()
else:
QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!")
def _reload_model(self):
"""Reload the transcription model with new settings."""
# Stop transcription if running
was_transcribing = self.is_transcribing
if was_transcribing:
self._stop_transcription()
# Update status
self.status_label.setText("⚙ Reloading model...")
self.start_button.setEnabled(False)
# Unload current model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Set device based on config
device_config = self.config.get('transcription.device', 'auto')
self.device_manager.set_device(device_config)
# Re-initialize transcription engine
model_size = self.config.get('transcription.model', 'base')
language = self.config.get('transcription.language', 'en')
device = self.device_manager.get_device_for_whisper()
compute_type = self.device_manager.get_compute_type()
# Update tracked settings
self.current_model_size = model_size
self.current_device_config = device_config
self.transcription_engine = TranscriptionEngine(
model_size=model_size,
device=device,
compute_type=compute_type,
language=language,
min_confidence=self.config.get('processing.min_confidence', 0.5)
)
# Load model in background thread
if self.model_loader_thread and self.model_loader_thread.isRunning():
self.model_loader_thread.wait()
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
self.model_loader_thread.finished.connect(self._on_model_reloaded)
self.model_loader_thread.start()
def _on_model_reloaded(self, success: bool, message: str):
"""Handle model reloading completion."""
if success:
host = self.config.get('web_server.host', '127.0.0.1')
port = self.config.get('web_server.port', 8080)
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
self.start_button.setEnabled(True)
QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!")
else:
self.status_label.setText("❌ Model loading failed")
QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}")
self.start_button.setEnabled(False)
def closeEvent(self, event):
"""Handle window closing."""
# Stop transcription if running
if self.is_transcribing:
self._stop_transcription()
# Unload model
if self.transcription_engine:
self.transcription_engine.unload_model()
# Wait for model loader thread
if self.model_loader_thread and self.model_loader_thread.isRunning():
self.model_loader_thread.wait()
event.accept()

310
gui/settings_dialog.py Normal file
View File

@@ -0,0 +1,310 @@
"""Settings dialog for configuring the application."""
import customtkinter as ctk
from tkinter import messagebox
from typing import Callable, List, Tuple
class SettingsDialog(ctk.CTkToplevel):
"""Dialog window for application settings."""
def __init__(
self,
parent,
config,
audio_devices: List[Tuple[int, str]],
compute_devices: List[Tuple[str, str]],
on_save: Callable = None
):
"""
Initialize settings dialog.
Args:
parent: Parent window
config: Configuration object
audio_devices: List of (device_index, device_name) tuples
compute_devices: List of (device_id, device_description) tuples
on_save: Callback function when settings are saved
"""
super().__init__(parent)
self.config = config
self.audio_devices = audio_devices
self.compute_devices = compute_devices
self.on_save = on_save
# Window configuration
self.title("Settings")
self.geometry("600x700")
self.resizable(False, False)
# Make dialog modal
self.transient(parent)
self.grab_set()
self._create_widgets()
self._load_current_settings()
def _create_widgets(self):
"""Create all settings widgets."""
# Main container with padding
main_frame = ctk.CTkFrame(self)
main_frame.pack(fill="both", expand=True, padx=20, pady=20)
# User Settings Section
user_frame = ctk.CTkFrame(main_frame)
user_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(user_frame, text="User Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# User name
name_frame = ctk.CTkFrame(user_frame)
name_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(name_frame, text="Display Name:", width=150).pack(side="left", padx=5)
self.name_entry = ctk.CTkEntry(name_frame, width=300)
self.name_entry.pack(side="left", padx=5)
# Audio Settings Section
audio_frame = ctk.CTkFrame(main_frame)
audio_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(audio_frame, text="Audio Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Audio device
device_frame = ctk.CTkFrame(audio_frame)
device_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(device_frame, text="Input Device:", width=150).pack(side="left", padx=5)
device_names = [name for _, name in self.audio_devices]
self.audio_device_menu = ctk.CTkOptionMenu(device_frame, values=device_names, width=300)
self.audio_device_menu.pack(side="left", padx=5)
# Chunk duration
chunk_frame = ctk.CTkFrame(audio_frame)
chunk_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(chunk_frame, text="Chunk Duration (s):", width=150).pack(side="left", padx=5)
self.chunk_entry = ctk.CTkEntry(chunk_frame, width=100)
self.chunk_entry.pack(side="left", padx=5)
# Transcription Settings Section
transcription_frame = ctk.CTkFrame(main_frame)
transcription_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(transcription_frame, text="Transcription Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Model size
model_frame = ctk.CTkFrame(transcription_frame)
model_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(model_frame, text="Model Size:", width=150).pack(side="left", padx=5)
self.model_menu = ctk.CTkOptionMenu(
model_frame,
values=["tiny", "base", "small", "medium", "large"],
width=200
)
self.model_menu.pack(side="left", padx=5)
# Compute device
compute_frame = ctk.CTkFrame(transcription_frame)
compute_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(compute_frame, text="Compute Device:", width=150).pack(side="left", padx=5)
device_descs = [desc for _, desc in self.compute_devices]
self.compute_device_menu = ctk.CTkOptionMenu(compute_frame, values=device_descs, width=300)
self.compute_device_menu.pack(side="left", padx=5)
# Language
lang_frame = ctk.CTkFrame(transcription_frame)
lang_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(lang_frame, text="Language:", width=150).pack(side="left", padx=5)
self.lang_menu = ctk.CTkOptionMenu(
lang_frame,
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"],
width=200
)
self.lang_menu.pack(side="left", padx=5)
# Noise Suppression Section
noise_frame = ctk.CTkFrame(main_frame)
noise_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(noise_frame, text="Noise Suppression", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Enable noise suppression
ns_enable_frame = ctk.CTkFrame(noise_frame)
ns_enable_frame.pack(fill="x", padx=10, pady=5)
self.noise_enabled_var = ctk.BooleanVar()
self.noise_enabled_check = ctk.CTkCheckBox(
ns_enable_frame,
text="Enable Noise Suppression",
variable=self.noise_enabled_var
)
self.noise_enabled_check.pack(side="left", padx=5)
# Noise suppression strength
strength_frame = ctk.CTkFrame(noise_frame)
strength_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(strength_frame, text="Strength:", width=150).pack(side="left", padx=5)
self.noise_strength_slider = ctk.CTkSlider(
strength_frame,
from_=0.0,
to=1.0,
number_of_steps=20,
width=300
)
self.noise_strength_slider.pack(side="left", padx=5)
self.noise_strength_label = ctk.CTkLabel(strength_frame, text="0.7", width=40)
self.noise_strength_label.pack(side="left", padx=5)
self.noise_strength_slider.configure(command=self._update_strength_label)
# VAD
vad_frame = ctk.CTkFrame(noise_frame)
vad_frame.pack(fill="x", padx=10, pady=5)
self.vad_enabled_var = ctk.BooleanVar()
self.vad_enabled_check = ctk.CTkCheckBox(
vad_frame,
text="Enable Voice Activity Detection",
variable=self.vad_enabled_var
)
self.vad_enabled_check.pack(side="left", padx=5)
# Display Settings Section
display_frame = ctk.CTkFrame(main_frame)
display_frame.pack(fill="x", pady=(0, 15))
ctk.CTkLabel(display_frame, text="Display Settings", font=("", 16, "bold")).pack(
anchor="w", padx=10, pady=(10, 5)
)
# Show timestamps
ts_frame = ctk.CTkFrame(display_frame)
ts_frame.pack(fill="x", padx=10, pady=5)
self.timestamps_var = ctk.BooleanVar()
self.timestamps_check = ctk.CTkCheckBox(
ts_frame,
text="Show Timestamps",
variable=self.timestamps_var
)
self.timestamps_check.pack(side="left", padx=5)
# Max lines
maxlines_frame = ctk.CTkFrame(display_frame)
maxlines_frame.pack(fill="x", padx=10, pady=5)
ctk.CTkLabel(maxlines_frame, text="Max Lines:", width=150).pack(side="left", padx=5)
self.maxlines_entry = ctk.CTkEntry(maxlines_frame, width=100)
self.maxlines_entry.pack(side="left", padx=5)
# Buttons
button_frame = ctk.CTkFrame(main_frame)
button_frame.pack(fill="x", pady=(10, 0))
self.save_button = ctk.CTkButton(
button_frame,
text="Save",
command=self._save_settings,
width=120
)
self.save_button.pack(side="right", padx=5)
self.cancel_button = ctk.CTkButton(
button_frame,
text="Cancel",
command=self.destroy,
width=120,
fg_color="gray"
)
self.cancel_button.pack(side="right", padx=5)
def _update_strength_label(self, value):
"""Update the noise strength label."""
self.noise_strength_label.configure(text=f"{value:.1f}")
def _load_current_settings(self):
"""Load current settings from config."""
# User settings
self.name_entry.insert(0, self.config.get('user.name', 'User'))
# Audio settings
current_device = self.config.get('audio.input_device', 'default')
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
if str(dev_idx) == current_device or current_device == 'default' and idx == 0:
self.audio_device_menu.set(dev_name)
break
self.chunk_entry.insert(0, str(self.config.get('audio.chunk_duration', 3.0)))
# Transcription settings
self.model_menu.set(self.config.get('transcription.model', 'base'))
current_compute = self.config.get('transcription.device', 'auto')
for dev_id, dev_desc in self.compute_devices:
if dev_id == current_compute or (current_compute == 'auto' and dev_id == self.compute_devices[0][0]):
self.compute_device_menu.set(dev_desc)
break
self.lang_menu.set(self.config.get('transcription.language', 'en'))
# Noise suppression
self.noise_enabled_var.set(self.config.get('noise_suppression.enabled', True))
strength = self.config.get('noise_suppression.strength', 0.7)
self.noise_strength_slider.set(strength)
self._update_strength_label(strength)
self.vad_enabled_var.set(self.config.get('processing.use_vad', True))
# Display settings
self.timestamps_var.set(self.config.get('display.show_timestamps', True))
self.maxlines_entry.insert(0, str(self.config.get('display.max_lines', 100)))
def _save_settings(self):
"""Save settings to config."""
try:
# User settings
self.config.set('user.name', self.name_entry.get())
# Audio settings
selected_audio = self.audio_device_menu.get()
for dev_idx, dev_name in self.audio_devices:
if dev_name == selected_audio:
self.config.set('audio.input_device', str(dev_idx))
break
chunk_duration = float(self.chunk_entry.get())
self.config.set('audio.chunk_duration', chunk_duration)
# Transcription settings
self.config.set('transcription.model', self.model_menu.get())
selected_compute = self.compute_device_menu.get()
for dev_id, dev_desc in self.compute_devices:
if dev_desc == selected_compute:
self.config.set('transcription.device', dev_id)
break
self.config.set('transcription.language', self.lang_menu.get())
# Noise suppression
self.config.set('noise_suppression.enabled', self.noise_enabled_var.get())
self.config.set('noise_suppression.strength', self.noise_strength_slider.get())
self.config.set('processing.use_vad', self.vad_enabled_var.get())
# Display settings
self.config.set('display.show_timestamps', self.timestamps_var.get())
max_lines = int(self.maxlines_entry.get())
self.config.set('display.max_lines', max_lines)
# Call save callback
if self.on_save:
self.on_save()
messagebox.showinfo("Settings Saved", "Settings have been saved successfully!")
self.destroy()
except ValueError as e:
messagebox.showerror("Invalid Input", f"Please check your input values:\n{e}")
except Exception as e:
messagebox.showerror("Error", f"Failed to save settings:\n{e}")

261
gui/settings_dialog_qt.py Normal file
View File

@@ -0,0 +1,261 @@
"""PySide6 settings dialog for configuring the application."""
from PySide6.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
QPushButton, QMessageBox, QGroupBox
)
from PySide6.QtCore import Qt
from typing import Callable, List, Tuple
class SettingsDialog(QDialog):
"""Dialog window for application settings using PySide6."""
def __init__(
self,
parent,
config,
audio_devices: List[Tuple[int, str]],
compute_devices: List[Tuple[str, str]],
on_save: Callable = None
):
"""
Initialize settings dialog.
Args:
parent: Parent window
config: Configuration object
audio_devices: List of (device_index, device_name) tuples
compute_devices: List of (device_id, device_description) tuples
on_save: Callback function when settings are saved
"""
super().__init__(parent)
self.config = config
self.audio_devices = audio_devices
self.compute_devices = compute_devices
self.on_save = on_save
# Window configuration
self.setWindowTitle("Settings")
self.setMinimumSize(600, 700)
self.setModal(True)
self._create_widgets()
self._load_current_settings()
def _create_widgets(self):
"""Create all settings widgets."""
main_layout = QVBoxLayout()
self.setLayout(main_layout)
# User Settings Group
user_group = QGroupBox("User Settings")
user_layout = QFormLayout()
self.name_input = QLineEdit()
user_layout.addRow("Display Name:", self.name_input)
user_group.setLayout(user_layout)
main_layout.addWidget(user_group)
# Audio Settings Group
audio_group = QGroupBox("Audio Settings")
audio_layout = QFormLayout()
self.audio_device_combo = QComboBox()
device_names = [name for _, name in self.audio_devices]
self.audio_device_combo.addItems(device_names)
audio_layout.addRow("Input Device:", self.audio_device_combo)
self.chunk_input = QLineEdit()
audio_layout.addRow("Chunk Duration (s):", self.chunk_input)
audio_group.setLayout(audio_layout)
main_layout.addWidget(audio_group)
# Transcription Settings Group
transcription_group = QGroupBox("Transcription Settings")
transcription_layout = QFormLayout()
self.model_combo = QComboBox()
self.model_combo.addItems(["tiny", "base", "small", "medium", "large"])
transcription_layout.addRow("Model Size:", self.model_combo)
self.compute_device_combo = QComboBox()
device_descs = [desc for _, desc in self.compute_devices]
self.compute_device_combo.addItems(device_descs)
transcription_layout.addRow("Compute Device:", self.compute_device_combo)
self.lang_combo = QComboBox()
self.lang_combo.addItems(["auto", "en", "es", "fr", "de", "it", "pt", "ru", "zh", "ja", "ko"])
transcription_layout.addRow("Language:", self.lang_combo)
transcription_group.setLayout(transcription_layout)
main_layout.addWidget(transcription_group)
# Noise Suppression Group
noise_group = QGroupBox("Noise Suppression")
noise_layout = QVBoxLayout()
self.noise_enabled_check = QCheckBox("Enable Noise Suppression")
noise_layout.addWidget(self.noise_enabled_check)
# Strength slider
strength_layout = QHBoxLayout()
strength_layout.addWidget(QLabel("Strength:"))
self.noise_strength_slider = QSlider(Qt.Horizontal)
self.noise_strength_slider.setMinimum(0)
self.noise_strength_slider.setMaximum(100)
self.noise_strength_slider.setValue(70)
self.noise_strength_slider.valueChanged.connect(self._update_strength_label)
strength_layout.addWidget(self.noise_strength_slider)
self.noise_strength_label = QLabel("0.7")
strength_layout.addWidget(self.noise_strength_label)
noise_layout.addLayout(strength_layout)
self.vad_enabled_check = QCheckBox("Enable Voice Activity Detection")
noise_layout.addWidget(self.vad_enabled_check)
noise_group.setLayout(noise_layout)
main_layout.addWidget(noise_group)
# Display Settings Group
display_group = QGroupBox("Display Settings")
display_layout = QFormLayout()
self.timestamps_check = QCheckBox()
display_layout.addRow("Show Timestamps:", self.timestamps_check)
self.maxlines_input = QLineEdit()
display_layout.addRow("Max Lines:", self.maxlines_input)
self.font_family_combo = QComboBox()
self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
display_layout.addRow("Font Family:", self.font_family_combo)
self.font_size_input = QLineEdit()
display_layout.addRow("Font Size:", self.font_size_input)
self.fade_seconds_input = QLineEdit()
display_layout.addRow("Fade After (seconds):", self.fade_seconds_input)
display_group.setLayout(display_layout)
main_layout.addWidget(display_group)
# Buttons
button_layout = QHBoxLayout()
button_layout.addStretch()
self.cancel_button = QPushButton("Cancel")
self.cancel_button.clicked.connect(self.reject)
button_layout.addWidget(self.cancel_button)
self.save_button = QPushButton("Save")
self.save_button.clicked.connect(self._save_settings)
self.save_button.setDefault(True)
button_layout.addWidget(self.save_button)
main_layout.addLayout(button_layout)
def _update_strength_label(self, value):
"""Update the noise strength label."""
self.noise_strength_label.setText(f"{value / 100:.1f}")
def _load_current_settings(self):
"""Load current settings from config."""
# User settings
self.name_input.setText(self.config.get('user.name', 'User'))
# Audio settings
current_device = self.config.get('audio.input_device', 'default')
for idx, (dev_idx, dev_name) in enumerate(self.audio_devices):
if str(dev_idx) == current_device or (current_device == 'default' and idx == 0):
self.audio_device_combo.setCurrentIndex(idx)
break
self.chunk_input.setText(str(self.config.get('audio.chunk_duration', 3.0)))
# Transcription settings
model = self.config.get('transcription.model', 'base')
self.model_combo.setCurrentText(model)
current_compute = self.config.get('transcription.device', 'auto')
for idx, (dev_id, dev_desc) in enumerate(self.compute_devices):
if dev_id == current_compute or (current_compute == 'auto' and idx == 0):
self.compute_device_combo.setCurrentIndex(idx)
break
lang = self.config.get('transcription.language', 'en')
self.lang_combo.setCurrentText(lang)
# Noise suppression
self.noise_enabled_check.setChecked(self.config.get('noise_suppression.enabled', True))
strength = self.config.get('noise_suppression.strength', 0.7)
self.noise_strength_slider.setValue(int(strength * 100))
self._update_strength_label(int(strength * 100))
self.vad_enabled_check.setChecked(self.config.get('processing.use_vad', True))
# Display settings
self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
font_family = self.config.get('display.font_family', 'Courier')
self.font_family_combo.setCurrentText(font_family)
self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
def _save_settings(self):
"""Save settings to config."""
try:
# User settings
self.config.set('user.name', self.name_input.text())
# Audio settings
selected_audio_idx = self.audio_device_combo.currentIndex()
dev_idx, _ = self.audio_devices[selected_audio_idx]
self.config.set('audio.input_device', str(dev_idx))
chunk_duration = float(self.chunk_input.text())
self.config.set('audio.chunk_duration', chunk_duration)
# Transcription settings
self.config.set('transcription.model', self.model_combo.currentText())
selected_compute_idx = self.compute_device_combo.currentIndex()
dev_id, _ = self.compute_devices[selected_compute_idx]
self.config.set('transcription.device', dev_id)
self.config.set('transcription.language', self.lang_combo.currentText())
# Noise suppression
self.config.set('noise_suppression.enabled', self.noise_enabled_check.isChecked())
self.config.set('noise_suppression.strength', self.noise_strength_slider.value() / 100.0)
self.config.set('processing.use_vad', self.vad_enabled_check.isChecked())
# Display settings
self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
max_lines = int(self.maxlines_input.text())
self.config.set('display.max_lines', max_lines)
self.config.set('display.font_family', self.font_family_combo.currentText())
font_size = int(self.font_size_input.text())
self.config.set('display.font_size', font_size)
fade_seconds = int(self.fade_seconds_input.text())
self.config.set('display.fade_after_seconds', fade_seconds)
# Call save callback
if self.on_save:
self.on_save()
QMessageBox.information(self, "Settings Saved", "Settings have been saved successfully!")
self.accept()
except ValueError as e:
QMessageBox.critical(self, "Invalid Input", f"Please check your input values:\n{e}")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")

View File

@@ -0,0 +1,127 @@
"""Transcription display widget for showing real-time transcriptions."""
import customtkinter as ctk
from typing import List
from datetime import datetime
class TranscriptionDisplay(ctk.CTkTextbox):
"""Custom text widget for displaying transcriptions."""
def __init__(self, master, max_lines: int = 100, show_timestamps: bool = True, **kwargs):
"""
Initialize transcription display.
Args:
master: Parent widget
max_lines: Maximum number of lines to keep in display
show_timestamps: Whether to show timestamps
**kwargs: Additional arguments for CTkTextbox
"""
super().__init__(master, **kwargs)
self.max_lines = max_lines
self.show_timestamps = show_timestamps
self.line_count = 0
# Configure text widget
self.configure(state="disabled") # Read-only by default
def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
"""
Add a new transcription to the display.
Args:
text: Transcription text
user_name: User/speaker name
timestamp: Timestamp of transcription
"""
if timestamp is None:
timestamp = datetime.now()
# Build the display line
line_parts = []
if self.show_timestamps:
time_str = timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if user_name:
line_parts.append(f"{user_name}:")
line_parts.append(text)
line = " ".join(line_parts) + "\n"
# Add to display
self.configure(state="normal")
self.insert("end", line)
self.configure(state="disabled")
# Auto-scroll to bottom
self.see("end")
# Track line count
self.line_count += 1
# Remove old lines if exceeding max
if self.line_count > self.max_lines:
self._remove_oldest_lines(self.line_count - self.max_lines)
def _remove_oldest_lines(self, num_lines: int):
"""
Remove oldest lines from the display.
Args:
num_lines: Number of lines to remove
"""
self.configure(state="normal")
self.delete("1.0", f"{num_lines + 1}.0")
self.configure(state="disabled")
self.line_count -= num_lines
def clear(self):
"""Clear all transcriptions."""
self.configure(state="normal")
self.delete("1.0", "end")
self.configure(state="disabled")
self.line_count = 0
def get_all_text(self) -> str:
"""
Get all transcription text.
Returns:
All text in the display
"""
return self.get("1.0", "end")
def set_max_lines(self, max_lines: int):
"""Update maximum number of lines to keep."""
self.max_lines = max_lines
# Trim if necessary
if self.line_count > self.max_lines:
self._remove_oldest_lines(self.line_count - self.max_lines)
def set_show_timestamps(self, show: bool):
"""Update whether to show timestamps."""
self.show_timestamps = show
def save_to_file(self, filepath: str) -> bool:
"""
Save transcriptions to a file.
Args:
filepath: Path to save file
Returns:
True if saved successfully
"""
try:
with open(filepath, 'w') as f:
f.write(self.get_all_text())
return True
except Exception as e:
print(f"Error saving transcriptions: {e}")
return False

View File

@@ -0,0 +1,159 @@
"""PySide6 transcription display widget for showing real-time transcriptions."""
from PySide6.QtWidgets import QTextEdit
from PySide6.QtGui import QFont, QTextCursor
from PySide6.QtCore import Qt, Slot
from datetime import datetime
class TranscriptionDisplay(QTextEdit):
"""Custom text widget for displaying transcriptions using PySide6."""
def __init__(self, parent=None, max_lines=100, show_timestamps=True, font_family="Courier", font_size=12):
"""
Initialize transcription display.
Args:
parent: Parent widget
max_lines: Maximum number of lines to keep in display
show_timestamps: Whether to show timestamps
font_family: Font family name
font_size: Font size in points
"""
super().__init__(parent)
self.max_lines = max_lines
self.show_timestamps = show_timestamps
self.line_count = 0
self.font_family = font_family
self.font_size = font_size
# Configure text widget
self.setReadOnly(True)
self.setFont(QFont(font_family, font_size))
# Set dark theme styling
self.setStyleSheet("""
QTextEdit {
background-color: #2b2b2b;
color: #ffffff;
border: 1px solid #3d3d3d;
border-radius: 5px;
padding: 10px;
}
""")
@Slot(str, str)
def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
"""
Add a new transcription to the display.
Args:
text: Transcription text
user_name: User/speaker name
timestamp: Timestamp of transcription
"""
if timestamp is None:
timestamp = datetime.now()
# Build the display line
line_parts = []
if self.show_timestamps:
time_str = timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if user_name:
line_parts.append(f"{user_name}:")
line_parts.append(text)
line = " ".join(line_parts)
# Add to display
self.append(line)
# Auto-scroll to bottom
cursor = self.textCursor()
cursor.movePosition(QTextCursor.End)
self.setTextCursor(cursor)
# Track line count
self.line_count += 1
# Remove old lines if exceeding max
if self.line_count > self.max_lines:
self._remove_oldest_lines(self.line_count - self.max_lines)
def _remove_oldest_lines(self, num_lines: int):
"""
Remove oldest lines from the display.
Args:
num_lines: Number of lines to remove
"""
cursor = self.textCursor()
cursor.movePosition(QTextCursor.Start)
for _ in range(num_lines):
cursor.select(QTextCursor.BlockUnderCursor)
cursor.removeSelectedText()
cursor.deleteChar() # Remove the newline
self.line_count -= num_lines
def clear_all(self):
"""Clear all transcriptions."""
self.clear()
self.line_count = 0
def get_all_text(self) -> str:
"""
Get all transcription text.
Returns:
All text in the display
"""
return self.toPlainText()
def set_max_lines(self, max_lines: int):
"""Update maximum number of lines to keep."""
self.max_lines = max_lines
# Trim if necessary
if self.line_count > self.max_lines:
self._remove_oldest_lines(self.line_count - self.max_lines)
def set_show_timestamps(self, show: bool):
"""Update whether to show timestamps."""
self.show_timestamps = show
def set_font(self, font_family: str, font_size: int):
"""
Update font settings.
Args:
font_family: Font family name
font_size: Font size in points
"""
self.font_family = font_family
self.font_size = font_size
super().setFont(QFont(font_family, font_size))
def save_to_file(self, filepath: str) -> bool:
"""
Save transcriptions to a file.
Args:
filepath: Path to save file
Returns:
True if saved successfully
"""
try:
with open(filepath, 'w') as f:
f.write(self.toPlainText())
return True
except Exception as e:
print(f"Error saving transcriptions: {e}")
return False