Added a clickable link in the status bar that opens the web display in the default browser. This makes it easy for users to access the OBS browser source without manually typing the URL. Features: - Shows "🌐 Open Web Display" link in green - Tooltip shows the full URL - Opens in default browser when clicked - Reads host/port from config automatically Location: Status bar, after user name URL format: http://127.0.0.1:8080 (or configured host:port) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
536 lines
20 KiB
Python
536 lines
20 KiB
Python
"""PySide6 main application window for the local transcription app."""
|
|
|
|
from PySide6.QtWidgets import (
|
|
QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
|
|
QPushButton, QLabel, QFileDialog, QMessageBox
|
|
)
|
|
from PySide6.QtCore import Qt, QThread, Signal
|
|
from PySide6.QtGui import QFont
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.append(str(Path(__file__).parent.parent))
|
|
|
|
from client.config import Config
|
|
from client.device_utils import DeviceManager
|
|
from client.audio_capture import AudioCapture
|
|
from client.noise_suppression import NoiseSuppressor
|
|
from client.transcription_engine import TranscriptionEngine
|
|
from gui.transcription_display_qt import TranscriptionDisplay
|
|
from gui.settings_dialog_qt import SettingsDialog
|
|
from server.web_display import TranscriptionWebServer
|
|
import asyncio
|
|
from threading import Thread
|
|
|
|
|
|
class WebServerThread(Thread):
|
|
"""Thread for running the web server."""
|
|
|
|
def __init__(self, web_server):
|
|
super().__init__(daemon=True)
|
|
self.web_server = web_server
|
|
self.loop = None
|
|
|
|
def run(self):
|
|
"""Run the web server in async event loop."""
|
|
self.loop = asyncio.new_event_loop()
|
|
asyncio.set_event_loop(self.loop)
|
|
self.loop.run_until_complete(self.web_server.start())
|
|
|
|
|
|
class ModelLoaderThread(QThread):
|
|
"""Thread for loading the Whisper model without blocking the GUI."""
|
|
|
|
finished = Signal(bool, str) # success, message
|
|
|
|
def __init__(self, transcription_engine):
|
|
super().__init__()
|
|
self.transcription_engine = transcription_engine
|
|
|
|
def run(self):
|
|
"""Load the model in background thread."""
|
|
try:
|
|
success = self.transcription_engine.load_model()
|
|
if success:
|
|
self.finished.emit(True, "Model loaded successfully")
|
|
else:
|
|
self.finished.emit(False, "Failed to load model")
|
|
except Exception as e:
|
|
self.finished.emit(False, f"Error loading model: {e}")
|
|
|
|
|
|
class MainWindow(QMainWindow):
|
|
"""Main application window using PySide6."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the main window."""
|
|
super().__init__()
|
|
|
|
# Application state
|
|
self.is_transcribing = False
|
|
self.config = Config()
|
|
self.device_manager = DeviceManager()
|
|
|
|
# Components (initialized later)
|
|
self.audio_capture: AudioCapture = None
|
|
self.noise_suppressor: NoiseSuppressor = None
|
|
self.transcription_engine: TranscriptionEngine = None
|
|
self.model_loader_thread: ModelLoaderThread = None
|
|
|
|
# Track current model settings
|
|
self.current_model_size: str = None
|
|
self.current_device_config: str = None
|
|
|
|
# Web server components
|
|
self.web_server: TranscriptionWebServer = None
|
|
self.web_server_thread: WebServerThread = None
|
|
|
|
# Configure window
|
|
self.setWindowTitle("Local Transcription")
|
|
self.resize(900, 700)
|
|
|
|
# Create UI
|
|
self._create_widgets()
|
|
|
|
# Initialize components (in background)
|
|
self._initialize_components()
|
|
|
|
# Start web server if enabled
|
|
self._start_web_server_if_enabled()
|
|
|
|
def _create_widgets(self):
|
|
"""Create all UI widgets."""
|
|
# Central widget
|
|
central_widget = QWidget()
|
|
self.setCentralWidget(central_widget)
|
|
|
|
main_layout = QVBoxLayout()
|
|
central_widget.setLayout(main_layout)
|
|
|
|
# Header
|
|
header_widget = QWidget()
|
|
header_widget.setFixedHeight(80)
|
|
header_layout = QHBoxLayout()
|
|
header_widget.setLayout(header_layout)
|
|
|
|
title_label = QLabel("Local Transcription")
|
|
title_font = QFont()
|
|
title_font.setPointSize(24)
|
|
title_font.setBold(True)
|
|
title_label.setFont(title_font)
|
|
header_layout.addWidget(title_label)
|
|
|
|
header_layout.addStretch()
|
|
|
|
self.settings_button = QPushButton("⚙ Settings")
|
|
self.settings_button.setFixedSize(120, 40)
|
|
self.settings_button.clicked.connect(self._open_settings)
|
|
header_layout.addWidget(self.settings_button)
|
|
|
|
main_layout.addWidget(header_widget)
|
|
|
|
# Status bar
|
|
status_widget = QWidget()
|
|
status_widget.setFixedHeight(60)
|
|
status_layout = QHBoxLayout()
|
|
status_widget.setLayout(status_layout)
|
|
|
|
self.status_label = QLabel("⚫ Initializing...")
|
|
status_font = QFont()
|
|
status_font.setPointSize(14)
|
|
self.status_label.setFont(status_font)
|
|
status_layout.addWidget(self.status_label)
|
|
|
|
device_info = self.device_manager.get_device_info()
|
|
device_text = device_info[0][1] if device_info else "No device"
|
|
self.device_label = QLabel(f"Device: {device_text}")
|
|
status_layout.addWidget(self.device_label)
|
|
|
|
user_name = self.config.get('user.name', 'User')
|
|
self.user_label = QLabel(f"User: {user_name}")
|
|
status_layout.addWidget(self.user_label)
|
|
|
|
# Web display link
|
|
web_host = self.config.get('web_server.host', '127.0.0.1')
|
|
web_port = self.config.get('web_server.port', 8080)
|
|
web_url = f"http://{web_host}:{web_port}"
|
|
self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
|
|
self.web_link.setOpenExternalLinks(True)
|
|
self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
|
|
self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
|
|
status_layout.addWidget(self.web_link)
|
|
|
|
status_layout.addStretch()
|
|
|
|
main_layout.addWidget(status_widget)
|
|
|
|
# Transcription display
|
|
self.transcription_display = TranscriptionDisplay(
|
|
max_lines=self.config.get('display.max_lines', 100),
|
|
show_timestamps=self.config.get('display.show_timestamps', True),
|
|
font_family=self.config.get('display.font_family', 'Courier'),
|
|
font_size=self.config.get('display.font_size', 12)
|
|
)
|
|
main_layout.addWidget(self.transcription_display)
|
|
|
|
# Control buttons
|
|
control_widget = QWidget()
|
|
control_widget.setFixedHeight(80)
|
|
control_layout = QHBoxLayout()
|
|
control_widget.setLayout(control_layout)
|
|
|
|
self.start_button = QPushButton("▶ Start Transcription")
|
|
self.start_button.setFixedSize(240, 50)
|
|
button_font = QFont()
|
|
button_font.setPointSize(14)
|
|
button_font.setBold(True)
|
|
self.start_button.setFont(button_font)
|
|
self.start_button.clicked.connect(self._toggle_transcription)
|
|
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
|
|
control_layout.addWidget(self.start_button)
|
|
|
|
self.clear_button = QPushButton("Clear")
|
|
self.clear_button.setFixedSize(120, 50)
|
|
self.clear_button.clicked.connect(self._clear_transcriptions)
|
|
control_layout.addWidget(self.clear_button)
|
|
|
|
self.save_button = QPushButton("💾 Save")
|
|
self.save_button.setFixedSize(120, 50)
|
|
self.save_button.clicked.connect(self._save_transcriptions)
|
|
control_layout.addWidget(self.save_button)
|
|
|
|
control_layout.addStretch()
|
|
|
|
main_layout.addWidget(control_widget)
|
|
|
|
def _initialize_components(self):
|
|
"""Initialize audio, noise suppression, and transcription components."""
|
|
# Update status
|
|
self.status_label.setText("⚙ Initializing...")
|
|
|
|
# Set device based on config
|
|
device_config = self.config.get('transcription.device', 'auto')
|
|
self.device_manager.set_device(device_config)
|
|
|
|
# Initialize transcription engine
|
|
model_size = self.config.get('transcription.model', 'base')
|
|
language = self.config.get('transcription.language', 'en')
|
|
device = self.device_manager.get_device_for_whisper()
|
|
compute_type = self.device_manager.get_compute_type()
|
|
|
|
# Track current settings
|
|
self.current_model_size = model_size
|
|
self.current_device_config = device_config
|
|
|
|
self.transcription_engine = TranscriptionEngine(
|
|
model_size=model_size,
|
|
device=device,
|
|
compute_type=compute_type,
|
|
language=language,
|
|
min_confidence=self.config.get('processing.min_confidence', 0.5)
|
|
)
|
|
|
|
# Load model in background thread
|
|
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
|
|
self.model_loader_thread.finished.connect(self._on_model_loaded)
|
|
self.model_loader_thread.start()
|
|
|
|
def _on_model_loaded(self, success: bool, message: str):
|
|
"""Handle model loading completion."""
|
|
if success:
|
|
host = self.config.get('web_server.host', '127.0.0.1')
|
|
port = self.config.get('web_server.port', 8080)
|
|
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
|
|
self.start_button.setEnabled(True)
|
|
else:
|
|
self.status_label.setText("❌ Model loading failed")
|
|
QMessageBox.critical(self, "Error", message)
|
|
self.start_button.setEnabled(False)
|
|
|
|
def _start_web_server_if_enabled(self):
|
|
"""Start web server."""
|
|
host = self.config.get('web_server.host', '127.0.0.1')
|
|
port = self.config.get('web_server.port', 8080)
|
|
show_timestamps = self.config.get('display.show_timestamps', True)
|
|
fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
|
|
|
print(f"Starting web server at http://{host}:{port}")
|
|
self.web_server = TranscriptionWebServer(
|
|
host=host,
|
|
port=port,
|
|
show_timestamps=show_timestamps,
|
|
fade_after_seconds=fade_after_seconds
|
|
)
|
|
self.web_server_thread = WebServerThread(self.web_server)
|
|
self.web_server_thread.start()
|
|
|
|
def _toggle_transcription(self):
|
|
"""Start or stop transcription."""
|
|
if not self.is_transcribing:
|
|
self._start_transcription()
|
|
else:
|
|
self._stop_transcription()
|
|
|
|
def _start_transcription(self):
|
|
"""Start transcription."""
|
|
try:
|
|
# Check if engine is ready
|
|
if not self.transcription_engine or not self.transcription_engine.is_loaded:
|
|
QMessageBox.critical(self, "Error", "Transcription engine not ready")
|
|
return
|
|
|
|
# Get audio device
|
|
audio_device_str = self.config.get('audio.input_device', 'default')
|
|
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
|
|
|
|
# Initialize audio capture
|
|
self.audio_capture = AudioCapture(
|
|
sample_rate=self.config.get('audio.sample_rate', 16000),
|
|
chunk_duration=self.config.get('audio.chunk_duration', 3.0),
|
|
overlap_duration=self.config.get('audio.overlap_duration', 0.5),
|
|
device=audio_device
|
|
)
|
|
|
|
# Initialize noise suppressor
|
|
self.noise_suppressor = NoiseSuppressor(
|
|
sample_rate=self.config.get('audio.sample_rate', 16000),
|
|
method="noisereduce" if self.config.get('noise_suppression.enabled', True) else "none",
|
|
strength=self.config.get('noise_suppression.strength', 0.7),
|
|
use_vad=self.config.get('processing.use_vad', True)
|
|
)
|
|
|
|
# Start recording
|
|
self.audio_capture.start_recording(callback=self._process_audio_chunk)
|
|
|
|
# Update UI
|
|
self.is_transcribing = True
|
|
self.start_button.setText("⏸ Stop Transcription")
|
|
self.start_button.setStyleSheet("background-color: #e74c3c; color: white;")
|
|
self.status_label.setText("🔴 Transcribing...")
|
|
|
|
except Exception as e:
|
|
QMessageBox.critical(self, "Error", f"Failed to start transcription:\n{e}")
|
|
print(f"Error starting transcription: {e}")
|
|
|
|
def _stop_transcription(self):
|
|
"""Stop transcription."""
|
|
try:
|
|
# Stop recording
|
|
if self.audio_capture:
|
|
self.audio_capture.stop_recording()
|
|
|
|
# Update UI
|
|
self.is_transcribing = False
|
|
self.start_button.setText("▶ Start Transcription")
|
|
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
|
|
self.status_label.setText("✓ Ready")
|
|
|
|
except Exception as e:
|
|
QMessageBox.critical(self, "Error", f"Failed to stop transcription:\n{e}")
|
|
print(f"Error stopping transcription: {e}")
|
|
|
|
def _process_audio_chunk(self, audio_chunk):
|
|
"""Process an audio chunk (noise suppression + transcription)."""
|
|
def process():
|
|
try:
|
|
# Apply noise suppression
|
|
processed_audio = self.noise_suppressor.process(audio_chunk, skip_silent=True)
|
|
|
|
# Skip if silent (VAD filtered it out)
|
|
if processed_audio is None:
|
|
return
|
|
|
|
# Transcribe
|
|
user_name = self.config.get('user.name', 'User')
|
|
result = self.transcription_engine.transcribe(
|
|
processed_audio,
|
|
sample_rate=self.config.get('audio.sample_rate', 16000),
|
|
user_name=user_name
|
|
)
|
|
|
|
# Display result (use Qt signal for thread safety)
|
|
if result:
|
|
# We need to update UI from main thread
|
|
# Note: We don't pass timestamp - let the display widget create it
|
|
from PySide6.QtCore import QMetaObject, Q_ARG
|
|
QMetaObject.invokeMethod(
|
|
self.transcription_display,
|
|
"add_transcription",
|
|
Qt.QueuedConnection,
|
|
Q_ARG(str, result.text),
|
|
Q_ARG(str, result.user_name)
|
|
)
|
|
|
|
# Broadcast to web server if enabled
|
|
if self.web_server and self.web_server_thread:
|
|
asyncio.run_coroutine_threadsafe(
|
|
self.web_server.broadcast_transcription(
|
|
result.text,
|
|
result.user_name,
|
|
result.timestamp
|
|
),
|
|
self.web_server_thread.loop
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error processing audio: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
# Run in background thread
|
|
from threading import Thread
|
|
Thread(target=process, daemon=True).start()
|
|
|
|
def _clear_transcriptions(self):
|
|
"""Clear all transcriptions."""
|
|
reply = QMessageBox.question(
|
|
self,
|
|
"Clear Transcriptions",
|
|
"Are you sure you want to clear all transcriptions?",
|
|
QMessageBox.Yes | QMessageBox.No
|
|
)
|
|
|
|
if reply == QMessageBox.Yes:
|
|
self.transcription_display.clear_all()
|
|
|
|
def _save_transcriptions(self):
|
|
"""Save transcriptions to file."""
|
|
filepath, _ = QFileDialog.getSaveFileName(
|
|
self,
|
|
"Save Transcriptions",
|
|
"",
|
|
"Text files (*.txt);;All files (*.*)"
|
|
)
|
|
|
|
if filepath:
|
|
if self.transcription_display.save_to_file(filepath):
|
|
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
|
|
else:
|
|
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
|
|
|
|
def _open_settings(self):
|
|
"""Open settings dialog."""
|
|
# Get audio devices
|
|
audio_devices = AudioCapture.get_input_devices()
|
|
if not audio_devices:
|
|
audio_devices = [(0, "Default")]
|
|
|
|
# Get compute devices
|
|
compute_devices = self.device_manager.get_device_info()
|
|
compute_devices.insert(0, ("auto", "Auto-detect"))
|
|
|
|
# Open settings dialog
|
|
dialog = SettingsDialog(
|
|
self,
|
|
self.config,
|
|
audio_devices,
|
|
compute_devices,
|
|
on_save=self._on_settings_saved
|
|
)
|
|
dialog.exec()
|
|
|
|
def _on_settings_saved(self):
|
|
"""Handle settings being saved."""
|
|
# Update user label
|
|
user_name = self.config.get('user.name', 'User')
|
|
self.user_label.setText(f"User: {user_name}")
|
|
|
|
# Update display settings
|
|
show_timestamps = self.config.get('display.show_timestamps', True)
|
|
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
|
|
self.transcription_display.set_show_timestamps(show_timestamps)
|
|
self.transcription_display.set_font(
|
|
self.config.get('display.font_family', 'Courier'),
|
|
self.config.get('display.font_size', 12)
|
|
)
|
|
|
|
# Update web server settings
|
|
if self.web_server:
|
|
self.web_server.show_timestamps = show_timestamps
|
|
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
|
|
|
# Check if model/device settings changed - reload model if needed
|
|
new_model = self.config.get('transcription.model', 'base')
|
|
new_device_config = self.config.get('transcription.device', 'auto')
|
|
|
|
# Only reload if model size or device changed
|
|
if self.current_model_size != new_model or self.current_device_config != new_device_config:
|
|
self._reload_model()
|
|
else:
|
|
QMessageBox.information(self, "Settings Saved", "Settings have been applied successfully!")
|
|
|
|
def _reload_model(self):
|
|
"""Reload the transcription model with new settings."""
|
|
# Stop transcription if running
|
|
was_transcribing = self.is_transcribing
|
|
if was_transcribing:
|
|
self._stop_transcription()
|
|
|
|
# Update status
|
|
self.status_label.setText("⚙ Reloading model...")
|
|
self.start_button.setEnabled(False)
|
|
|
|
# Unload current model
|
|
if self.transcription_engine:
|
|
self.transcription_engine.unload_model()
|
|
|
|
# Set device based on config
|
|
device_config = self.config.get('transcription.device', 'auto')
|
|
self.device_manager.set_device(device_config)
|
|
|
|
# Re-initialize transcription engine
|
|
model_size = self.config.get('transcription.model', 'base')
|
|
language = self.config.get('transcription.language', 'en')
|
|
device = self.device_manager.get_device_for_whisper()
|
|
compute_type = self.device_manager.get_compute_type()
|
|
|
|
# Update tracked settings
|
|
self.current_model_size = model_size
|
|
self.current_device_config = device_config
|
|
|
|
self.transcription_engine = TranscriptionEngine(
|
|
model_size=model_size,
|
|
device=device,
|
|
compute_type=compute_type,
|
|
language=language,
|
|
min_confidence=self.config.get('processing.min_confidence', 0.5)
|
|
)
|
|
|
|
# Load model in background thread
|
|
if self.model_loader_thread and self.model_loader_thread.isRunning():
|
|
self.model_loader_thread.wait()
|
|
|
|
self.model_loader_thread = ModelLoaderThread(self.transcription_engine)
|
|
self.model_loader_thread.finished.connect(self._on_model_reloaded)
|
|
self.model_loader_thread.start()
|
|
|
|
def _on_model_reloaded(self, success: bool, message: str):
|
|
"""Handle model reloading completion."""
|
|
if success:
|
|
host = self.config.get('web_server.host', '127.0.0.1')
|
|
port = self.config.get('web_server.port', 8080)
|
|
self.status_label.setText(f"✓ Ready | Web: http://{host}:{port}")
|
|
self.start_button.setEnabled(True)
|
|
QMessageBox.information(self, "Settings Saved", "Model reloaded successfully with new settings!")
|
|
else:
|
|
self.status_label.setText("❌ Model loading failed")
|
|
QMessageBox.critical(self, "Error", f"Failed to reload model:\n{message}")
|
|
self.start_button.setEnabled(False)
|
|
|
|
def closeEvent(self, event):
|
|
"""Handle window closing."""
|
|
# Stop transcription if running
|
|
if self.is_transcribing:
|
|
self._stop_transcription()
|
|
|
|
# Unload model
|
|
if self.transcription_engine:
|
|
self.transcription_engine.unload_model()
|
|
|
|
# Wait for model loader thread
|
|
if self.model_loader_thread and self.model_loader_thread.isRunning():
|
|
self.model_loader_thread.wait()
|
|
|
|
event.accept()
|