Add unified per-speaker font support and remote transcription service

Font changes:
- Consolidate font settings into single Display Settings section
- Support Web-Safe, Google Fonts, and Custom File uploads for both displays
- Fix Google Fonts URL encoding (use + instead of %2B for spaces)
- Fix per-speaker font inline style quote escaping in Node.js display
- Add font debug logging to help diagnose font issues
- Update web server to sync all font settings on settings change
- Remove deprecated PHP server documentation files

New features:
- Add remote transcription service for GPU offloading
- Add instance lock to prevent multiple app instances
- Add version tracking

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions

View File

@@ -9,16 +9,16 @@ from PySide6.QtGui import QFont
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
# Add parent directory to path for imports (resolve symlinks)
sys.path.append(str(Path(__file__).resolve().parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
from client.server_sync import ServerSyncClient
from gui.transcription_display_qt import TranscriptionDisplay
from gui.settings_dialog_qt import SettingsDialog
from server.web_display import TranscriptionWebServer
from version import __version__
import asyncio
from threading import Thread
@@ -96,9 +96,13 @@ class MainWindow(QMainWindow):
# Server sync components
self.server_sync_client: ServerSyncClient = None
# Store all transcriptions for saving (separate from display)
self.transcriptions: list = []
# Configure window
self.setWindowTitle("Local Transcription")
self.resize(900, 700)
self.resize(700, 300)
self.setMinimumSize(600, 280)
# Set application icon
# In PyInstaller frozen executables, use _MEIPASS for bundled files
@@ -108,7 +112,7 @@ class MainWindow(QMainWindow):
icon_path = Path(sys._MEIPASS) / "LocalTranscription.png"
else:
# Running in normal Python
icon_path = Path(__file__).parent.parent / "LocalTranscription.png"
icon_path = Path(__file__).resolve().parent.parent / "LocalTranscription.png"
if icon_path.exists():
from PySide6.QtGui import QIcon
@@ -174,13 +178,14 @@ class MainWindow(QMainWindow):
# Status bar
status_widget = QWidget()
status_widget.setFixedHeight(60)
status_widget.setFixedHeight(40)
status_layout = QHBoxLayout()
status_layout.setContentsMargins(0, 0, 0, 0)
status_widget.setLayout(status_layout)
self.status_label = QLabel("⚫ Initializing...")
status_font = QFont()
status_font.setPointSize(14)
status_font.setPointSize(12)
self.status_label.setFont(status_font)
status_layout.addWidget(self.status_label)
@@ -193,28 +198,36 @@ class MainWindow(QMainWindow):
self.user_label = QLabel(f"User: {user_name}")
status_layout.addWidget(self.user_label)
# Web display link
web_host = self.config.get('web_server.host', '127.0.0.1')
web_port = self.config.get('web_server.port', 8080)
web_url = f"http://{web_host}:{web_port}"
self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
self.web_link.setOpenExternalLinks(True)
self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
status_layout.addWidget(self.web_link)
status_layout.addStretch()
main_layout.addWidget(status_widget)
# Transcription display
self.transcription_display = TranscriptionDisplay(
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font_family=self.config.get('display.font_family', 'Courier'),
font_size=self.config.get('display.font_size', 12)
)
main_layout.addWidget(self.transcription_display)
# Web display links section
links_widget = QWidget()
links_layout = QVBoxLayout()
links_layout.setContentsMargins(0, 5, 0, 5)
links_layout.setSpacing(5)
links_widget.setLayout(links_layout)
# Local web display link
web_host = self.config.get('web_server.host', '127.0.0.1')
web_port = self.config.get('web_server.port', 8080)
web_url = f"http://{web_host}:{web_port}"
self.web_link = QLabel(f'🌐 Local Web Display: <a href="{web_url}">{web_url}</a>')
self.web_link.setOpenExternalLinks(True)
self.web_link.setToolTip("Click to open in browser (for OBS)")
self.web_link.setStyleSheet("QLabel a { color: #4CAF50; }")
links_layout.addWidget(self.web_link)
# Multi-user sync display link (shown when server sync is enabled)
self.sync_link = QLabel("")
self.sync_link.setOpenExternalLinks(True)
self.sync_link.setStyleSheet("QLabel a { color: #2196F3; }")
self.sync_link.setVisible(False)
links_layout.addWidget(self.sync_link)
self._update_sync_link()
main_layout.addWidget(links_widget)
# Control buttons
control_widget = QWidget()
@@ -232,7 +245,7 @@ class MainWindow(QMainWindow):
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
control_layout.addWidget(self.start_button)
self.clear_button = QPushButton("Clear")
self.clear_button = QPushButton("🗑 Clear")
self.clear_button.setFixedSize(120, 50)
self.clear_button.clicked.connect(self._clear_transcriptions)
control_layout.addWidget(self.clear_button)
@@ -246,6 +259,12 @@ class MainWindow(QMainWindow):
main_layout.addWidget(control_widget)
# Version label (bottom right)
version_label = QLabel(f"v{__version__}")
version_label.setStyleSheet("QLabel { color: #666; font-size: 10px; }")
version_label.setAlignment(Qt.AlignRight)
main_layout.addWidget(version_label)
def _initialize_components(self):
"""Initialize RealtimeSTT transcription engine."""
# Update status
@@ -271,6 +290,20 @@ class MainWindow(QMainWindow):
user_name = self.config.get('user.name', 'User')
# Check for continuous/fast speaker mode
continuous_mode = self.config.get('transcription.continuous_mode', False)
# Get timing settings - use faster values if continuous mode is enabled
if continuous_mode:
# Faster settings for speakers who talk without pauses
post_speech_silence = 0.15 # Reduced from default 0.3
min_gap = 0.0 # No gap between recordings
min_recording = 0.3 # Shorter minimum recording
else:
post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
self.transcription_engine = RealtimeTranscriptionEngine(
model=model,
device=device,
@@ -278,12 +311,13 @@ class MainWindow(QMainWindow):
compute_type=compute_type,
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
post_speech_silence_duration=self.config.get('transcription.post_speech_silence_duration', 0.3),
min_length_of_recording=self.config.get('transcription.min_length_of_recording', 0.5),
min_gap_between_recordings=self.config.get('transcription.min_gap_between_recordings', 0.0),
post_speech_silence_duration=post_speech_silence,
min_length_of_recording=min_recording,
min_gap_between_recordings=min_gap,
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
beam_size=self.config.get('transcription.beam_size', 5),
initial_prompt=self.config.get('transcription.initial_prompt', ''),
@@ -332,6 +366,12 @@ class MainWindow(QMainWindow):
max_lines = self.config.get('display.max_lines', 50)
font_family = self.config.get('display.font_family', 'Arial')
font_size = self.config.get('display.font_size', 16)
fonts_dir = self.config.fonts_dir # Custom fonts directory
# Font source settings
font_source = self.config.get('display.font_source', 'System Font')
websafe_font = self.config.get('display.websafe_font', 'Arial')
google_font = self.config.get('display.google_font', 'Roboto')
# Try up to 5 ports if the default is in use
ports_to_try = [port] + [port + i for i in range(1, 5)]
@@ -346,7 +386,11 @@ class MainWindow(QMainWindow):
fade_after_seconds=fade_after_seconds,
max_lines=max_lines,
font_family=font_family,
font_size=font_size
font_size=font_size,
fonts_dir=fonts_dir,
font_source=font_source,
websafe_font=websafe_font,
google_font=google_font
)
self.web_server_thread = WebServerThread(self.web_server)
self.web_server_thread.start()
@@ -450,15 +494,21 @@ class MainWindow(QMainWindow):
return
try:
# Update display with preview (thread-safe Qt call)
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, f"[PREVIEW] {result.text}"),
Q_ARG(str, result.user_name)
)
# Broadcast preview to local web server
if self.web_server and self.web_server_thread and self.web_server_thread.loop:
asyncio.run_coroutine_threadsafe(
self.web_server.broadcast_preview(
result.text,
result.user_name,
result.timestamp
),
self.web_server_thread.loop
)
# Send preview to server sync if enabled
if self.server_sync_client:
self.server_sync_client.send_preview(result.text, result.timestamp)
except Exception as e:
print(f"Error handling realtime transcription: {e}")
@@ -468,15 +518,8 @@ class MainWindow(QMainWindow):
return
try:
# Update display (thread-safe Qt call)
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, result.text),
Q_ARG(str, result.user_name)
)
# Store transcription for saving
self.transcriptions.append(result)
# Broadcast to web server if enabled
if self.web_server and self.web_server_thread:
@@ -508,18 +551,27 @@ class MainWindow(QMainWindow):
def _clear_transcriptions(self):
"""Clear all transcriptions."""
if not self.transcriptions:
QMessageBox.information(self, "No Transcriptions", "There are no transcriptions to clear.")
return
reply = QMessageBox.question(
self,
"Clear Transcriptions",
"Are you sure you want to clear all transcriptions?",
f"Are you sure you want to clear {len(self.transcriptions)} transcription(s)?",
QMessageBox.Yes | QMessageBox.No
)
if reply == QMessageBox.Yes:
self.transcription_display.clear_all()
self.transcriptions.clear()
QMessageBox.information(self, "Cleared", "All transcriptions have been cleared.")
def _save_transcriptions(self):
"""Save transcriptions to file."""
if not self.transcriptions:
QMessageBox.warning(self, "No Transcriptions", "There are no transcriptions to save.")
return
filepath, _ = QFileDialog.getSaveFileName(
self,
"Save Transcriptions",
@@ -528,10 +580,21 @@ class MainWindow(QMainWindow):
)
if filepath:
if self.transcription_display.save_to_file(filepath):
try:
show_timestamps = self.config.get('display.show_timestamps', True)
with open(filepath, 'w', encoding='utf-8') as f:
for result in self.transcriptions:
line_parts = []
if show_timestamps:
time_str = result.timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if result.user_name and result.user_name.strip():
line_parts.append(f"{result.user_name}:")
line_parts.append(result.text)
f.write(" ".join(line_parts) + "\n")
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
else:
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to save transcriptions:\n{e}")
def _open_settings(self):
"""Open settings dialog."""
@@ -569,22 +632,20 @@ class MainWindow(QMainWindow):
user_name = self.config.get('user.name', 'User')
self.user_label.setText(f"User: {user_name}")
# Update display settings
show_timestamps = self.config.get('display.show_timestamps', True)
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(show_timestamps)
self.transcription_display.set_font(
self.config.get('display.font_family', 'Courier'),
self.config.get('display.font_size', 12)
)
# Update web server settings
if self.web_server:
self.web_server.show_timestamps = show_timestamps
self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
self.web_server.max_lines = self.config.get('display.max_lines', 50)
self.web_server.font_family = self.config.get('display.font_family', 'Arial')
self.web_server.font_size = self.config.get('display.font_size', 16)
# Update font source settings
self.web_server.font_source = self.config.get('display.font_source', 'System Font')
self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
# Update sync link visibility based on server sync settings
self._update_sync_link()
# Restart server sync if it was running and settings changed
if self.is_transcribing and self.server_sync_client:
@@ -656,18 +717,33 @@ class MainWindow(QMainWindow):
room = self.config.get('server_sync.room', 'default')
passphrase = self.config.get('server_sync.passphrase', '')
user_name = self.config.get('user.name', 'User')
fonts_dir = self.config.fonts_dir # Custom fonts directory
# Font settings (shared with display settings)
# Note: "System Font" only works locally, so we treat it as "None" for server sync
font_source = self.config.get('display.font_source', 'System Font')
if font_source == "System Font":
font_source = "None" # System fonts don't work on remote displays
websafe_font = self.config.get('display.websafe_font', '')
google_font = self.config.get('display.google_font', '')
custom_font_file = self.config.get('display.custom_font_file', '')
if not url:
print("Server sync enabled but no URL configured")
return
print(f"Starting server sync: {url}, room: {room}, user: {user_name}")
print(f"Starting server sync: {url}, room: {room}, user: {user_name}, font: {font_source}")
self.server_sync_client = ServerSyncClient(
url=url,
room=room,
passphrase=passphrase,
user_name=user_name
user_name=user_name,
fonts_dir=fonts_dir,
font_source=font_source,
websafe_font=websafe_font if websafe_font else None,
google_font=google_font if google_font else None,
custom_font_file=custom_font_file if custom_font_file else None
)
self.server_sync_client.start()
@@ -679,6 +755,40 @@ class MainWindow(QMainWindow):
f"Failed to start server sync:\n{e}\n\nTranscription will continue locally."
)
def _update_sync_link(self):
"""Update the multi-user sync link visibility and URL."""
server_sync_enabled = self.config.get('server_sync.enabled', False)
server_url = self.config.get('server_sync.url', '')
room = self.config.get('server_sync.room', 'default')
if server_sync_enabled and server_url:
# Extract base URL from the API endpoint (e.g., http://server:3000/api/send -> http://server:3000)
try:
from urllib.parse import urlparse, urlencode
parsed = urlparse(server_url)
base_url = f"{parsed.scheme}://{parsed.netloc}"
# Get display settings to pass as URL parameters
params = {
'room': room,
'fontfamily': self.config.get('display.font_family', 'Arial'),
'fontsize': self.config.get('display.font_size', 16),
'fade': self.config.get('display.fade_after_seconds', 10),
'timestamps': 'true' if self.config.get('display.show_timestamps', True) else 'false',
'maxlines': self.config.get('display.max_lines', 50)
}
display_url = f"{base_url}/display?{urlencode(params)}"
# Show shorter text with just address and room
display_text = f"{base_url} (room: {room})"
self.sync_link.setText(f'🔗 Multi-User Display: <a href="{display_url}">{display_text}</a>')
self.sync_link.setToolTip(f"Click to open: {display_url}")
self.sync_link.setVisible(True)
except Exception as e:
print(f"Error parsing server URL: {e}")
self.sync_link.setVisible(False)
else:
self.sync_link.setVisible(False)
def closeEvent(self, event):
"""Handle window closing."""
# Stop transcription if running

View File

@@ -3,10 +3,11 @@
from PySide6.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
QFileDialog
)
from PySide6.QtCore import Qt
from PySide6.QtGui import QScreen
from PySide6.QtGui import QScreen, QFontDatabase
from typing import Callable, List, Tuple
@@ -179,6 +180,16 @@ class SettingsDialog(QDialog):
self.realtime_model_combo.addItems(["tiny", "tiny.en", "base", "base.en"])
realtime_layout.addRow("Preview Model:", self.realtime_model_combo)
self.realtime_pause_input = QLineEdit()
self.realtime_pause_input.setToolTip(
"Seconds between preview updates:\n"
"• Lower values = More responsive, more frequent updates\n"
"• Higher values = Less CPU usage, updates less often\n"
"• 0.1 is recommended for real-time streaming\n"
"• Try 0.05 for even faster updates"
)
realtime_layout.addRow("Preview Update Interval (s):", self.realtime_pause_input)
realtime_group.setLayout(realtime_layout)
content_layout.addWidget(realtime_group)
@@ -261,6 +272,16 @@ class SettingsDialog(QDialog):
)
timing_layout.addRow("Pre-Recording Buffer (s):", self.pre_buffer_input)
self.continuous_mode_check = QCheckBox()
self.continuous_mode_check.setToolTip(
"Fast Speaker Mode:\n"
"• For speakers who talk quickly without pauses\n"
"• Reduces silence detection thresholds\n"
"• Produces more frequent transcription outputs\n"
"• May result in more fragmented sentences"
)
timing_layout.addRow("Fast Speaker Mode:", self.continuous_mode_check)
timing_group.setLayout(timing_layout)
content_layout.addWidget(timing_group)
@@ -281,10 +302,79 @@ class SettingsDialog(QDialog):
)
display_layout.addRow("Max Lines:", self.maxlines_input)
# Font source selector (shared for local display and server sync)
self.display_font_source_combo = QComboBox()
self.display_font_source_combo.addItems(["System Font", "Web-Safe", "Google Font", "Custom File"])
self.display_font_source_combo.setToolTip(
"Choose font for local display and server sync:\n"
"• System Font - Local only (won't work with server sync)\n"
"• Web-Safe - Universal fonts (Arial, Comic Sans, etc.)\n"
"• Google Font - Free fonts from fonts.google.com\n"
"• Custom File - Upload your own font file"
)
self.display_font_source_combo.currentTextChanged.connect(self._on_display_font_source_changed)
display_layout.addRow("Font Source:", self.display_font_source_combo)
# System font selector
self.font_family_combo = QComboBox()
self.font_family_combo.setToolTip("Font family for transcription display")
self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
display_layout.addRow("Font Family:", self.font_family_combo)
self.font_family_combo.setToolTip("Font family for transcription display (system fonts)")
self.font_family_combo.setEditable(True)
self.font_family_combo.setMaxVisibleItems(20)
system_fonts = QFontDatabase.families()
common_fonts = ["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"]
ordered_fonts = []
for font in common_fonts:
if font in system_fonts:
ordered_fonts.append(font)
for font in sorted(system_fonts):
if font not in ordered_fonts:
ordered_fonts.append(font)
self.font_family_combo.addItems(ordered_fonts)
display_layout.addRow("System Font:", self.font_family_combo)
# Web-safe font selector for display
self.display_websafe_combo = QComboBox()
display_websafe_fonts = [
"Arial", "Arial Black", "Comic Sans MS", "Courier New",
"Georgia", "Impact", "Lucida Console", "Lucida Sans Unicode",
"Palatino Linotype", "Tahoma", "Times New Roman", "Trebuchet MS", "Verdana"
]
self.display_websafe_combo.addItems(display_websafe_fonts)
self.display_websafe_combo.setToolTip("Web-safe fonts work on all systems")
display_layout.addRow("Web-Safe Font:", self.display_websafe_combo)
# Google Font selector for display
self.display_google_font_combo = QComboBox()
display_google_fonts = [
"Roboto", "Open Sans", "Lato", "Montserrat", "Poppins",
"Nunito", "Raleway", "Ubuntu", "Rubik", "Work Sans",
"Inter", "Outfit", "Quicksand", "Comfortaa", "Varela Round",
"Playfair Display", "Merriweather", "Lora", "PT Serif", "Crimson Text",
"Roboto Mono", "Source Code Pro", "Fira Code", "JetBrains Mono", "IBM Plex Mono",
"Bebas Neue", "Oswald", "Righteous", "Bangers", "Permanent Marker",
"Pacifico", "Lobster", "Dancing Script", "Caveat", "Satisfy"
]
self.display_google_font_combo.addItems(display_google_fonts)
self.display_google_font_combo.setToolTip("Select a Google Font for display")
display_layout.addRow("Google Font:", self.display_google_font_combo)
# Custom font file picker (for server sync upload)
custom_font_layout = QHBoxLayout()
self.display_custom_font_input = QLineEdit()
self.display_custom_font_input.setPlaceholderText("No file selected")
self.display_custom_font_input.setReadOnly(True)
self.display_custom_font_input.setToolTip(
"Select a font file to use:\n"
"• Supports .ttf, .otf, .woff, .woff2 files\n"
"• Font is uploaded to server automatically when using Server Sync"
)
custom_font_layout.addWidget(self.display_custom_font_input)
self.display_custom_font_browse = QPushButton("Browse...")
self.display_custom_font_browse.clicked.connect(self._browse_display_custom_font)
custom_font_layout.addWidget(self.display_custom_font_browse)
display_layout.addRow("Custom Font File:", custom_font_layout)
self.font_size_input = QLineEdit()
self.font_size_input.setToolTip("Font size in pixels (12-20 recommended)")
@@ -301,6 +391,9 @@ class SettingsDialog(QDialog):
display_group.setLayout(display_layout)
content_layout.addWidget(display_group)
# Initially show only System Font (default)
self._on_display_font_source_changed("System Font")
# Server Sync Group
server_group = QGroupBox("Multi-User Server Sync (Optional)")
server_layout = QFormLayout()
@@ -339,9 +432,55 @@ class SettingsDialog(QDialog):
)
server_layout.addRow("Passphrase:", self.server_passphrase_input)
# Note about font settings
font_note = QLabel("Font settings are in Display Settings above")
font_note.setStyleSheet("color: #666; font-style: italic;")
server_layout.addRow("", font_note)
server_group.setLayout(server_layout)
content_layout.addWidget(server_group)
# Remote Processing Group
remote_group = QGroupBox("Remote Processing (GPU Offload)")
remote_layout = QFormLayout()
remote_layout.setSpacing(10)
self.remote_enabled_check = QCheckBox()
self.remote_enabled_check.setToolTip(
"Enable remote transcription processing:\n"
"• Offload transcription to a GPU-equipped server\n"
"• Reduces local CPU/GPU usage\n"
"• Requires running the remote transcription service"
)
remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
self.remote_url_input = QLineEdit()
self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
self.remote_url_input.setToolTip(
"WebSocket URL of the remote transcription service:\n"
"• Format: ws://host:port/ws/transcribe\n"
"• Use wss:// for secure connections"
)
remote_layout.addRow("Server URL:", self.remote_url_input)
self.remote_api_key_input = QLineEdit()
self.remote_api_key_input.setEchoMode(QLineEdit.Password)
self.remote_api_key_input.setPlaceholderText("your-api-key")
self.remote_api_key_input.setToolTip(
"API key for authentication with the remote service"
)
remote_layout.addRow("API Key:", self.remote_api_key_input)
self.remote_fallback_check = QCheckBox("Enable")
self.remote_fallback_check.setChecked(True)
self.remote_fallback_check.setToolTip(
"Fall back to local transcription if remote service is unavailable"
)
remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
remote_group.setLayout(remote_layout)
content_layout.addWidget(remote_group)
# Add stretch to push everything to the top
content_layout.addStretch()
@@ -367,6 +506,77 @@ class SettingsDialog(QDialog):
"""Update the Silero sensitivity label."""
self.silero_label.setText(f"{value / 100:.2f}")
def _open_fonts_folder(self):
"""Open the custom fonts folder in the system file manager."""
import subprocess
import sys
from pathlib import Path
fonts_dir = self.config.fonts_dir
# Ensure the folder exists
fonts_dir.mkdir(parents=True, exist_ok=True)
# Open the folder in the system file manager
if sys.platform == 'win32':
subprocess.run(['explorer', str(fonts_dir)])
elif sys.platform == 'darwin':
subprocess.run(['open', str(fonts_dir)])
else:
# Linux
subprocess.run(['xdg-open', str(fonts_dir)])
def _on_display_font_source_changed(self, source: str):
"""Show/hide display font inputs based on selected source."""
# Hide all font-specific inputs first
self.font_family_combo.setVisible(False)
self.display_websafe_combo.setVisible(False)
self.display_google_font_combo.setVisible(False)
self.display_custom_font_input.setVisible(False)
self.display_custom_font_browse.setVisible(False)
# Find the form layout rows and hide/show labels too
parent = self.display_font_source_combo.parent()
display_layout = parent.layout() if parent else None
if display_layout and hasattr(display_layout, 'rowCount'):
for i in range(display_layout.rowCount()):
label = display_layout.itemAt(i, QFormLayout.LabelRole)
field = display_layout.itemAt(i, QFormLayout.FieldRole)
if label and field:
label_widget = label.widget()
if label_widget:
label_text = label_widget.text()
if label_text == "System Font:":
label_widget.setVisible(source == "System Font")
elif label_text == "Web-Safe Font:":
label_widget.setVisible(source == "Web-Safe")
elif label_text == "Google Font:":
label_widget.setVisible(source == "Google Font")
elif label_text == "Custom Font File:":
label_widget.setVisible(source == "Custom File")
# Show the relevant input
if source == "System Font":
self.font_family_combo.setVisible(True)
elif source == "Web-Safe":
self.display_websafe_combo.setVisible(True)
elif source == "Google Font":
self.display_google_font_combo.setVisible(True)
elif source == "Custom File":
self.display_custom_font_input.setVisible(True)
self.display_custom_font_browse.setVisible(True)
def _browse_display_custom_font(self):
"""Browse for a custom font file."""
file_path, _ = QFileDialog.getOpenFileName(
self,
"Select Font File",
"",
"Font Files (*.ttf *.otf *.woff *.woff2);;All Files (*)"
)
if file_path:
self.display_custom_font_input.setText(file_path)
def _load_current_settings(self):
"""Load current settings from config."""
# User settings
@@ -402,6 +612,7 @@ class SettingsDialog(QDialog):
self.realtime_enabled_check.setChecked(self.config.get('transcription.enable_realtime_transcription', False))
realtime_model = self.config.get('transcription.realtime_model', 'tiny.en')
self.realtime_model_combo.setCurrentText(realtime_model)
self.realtime_pause_input.setText(str(self.config.get('transcription.realtime_processing_pause', 0.1)))
# VAD settings
silero_sens = self.config.get('transcription.silero_sensitivity', 0.4)
@@ -417,13 +628,23 @@ class SettingsDialog(QDialog):
self.post_silence_input.setText(str(self.config.get('transcription.post_speech_silence_duration', 0.3)))
self.min_recording_input.setText(str(self.config.get('transcription.min_length_of_recording', 0.5)))
self.pre_buffer_input.setText(str(self.config.get('transcription.pre_recording_buffer_duration', 0.2)))
self.continuous_mode_check.setChecked(self.config.get('transcription.continuous_mode', False))
# Display settings
self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
# Display font settings
display_font_source = self.config.get('display.font_source', 'System Font')
self.display_font_source_combo.setCurrentText(display_font_source)
font_family = self.config.get('display.font_family', 'Courier')
self.font_family_combo.setCurrentText(font_family)
self.display_websafe_combo.setCurrentText(self.config.get('display.websafe_font', 'Arial'))
display_google_font = self.config.get('display.google_font', 'Roboto')
if display_google_font:
self.display_google_font_combo.setCurrentText(display_google_font)
self.display_custom_font_input.setText(self.config.get('display.custom_font_file', ''))
self._on_display_font_source_changed(display_font_source)
self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
@@ -434,6 +655,12 @@ class SettingsDialog(QDialog):
self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
# Remote processing settings
self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
def _save_settings(self):
"""Save settings to config."""
try:
@@ -459,6 +686,7 @@ class SettingsDialog(QDialog):
# Realtime preview
self.config.set('transcription.enable_realtime_transcription', self.realtime_enabled_check.isChecked())
self.config.set('transcription.realtime_model', self.realtime_model_combo.currentText())
self.config.set('transcription.realtime_processing_pause', float(self.realtime_pause_input.text()))
# VAD settings
self.config.set('transcription.silero_sensitivity', self.silero_slider.value() / 100.0)
@@ -469,12 +697,20 @@ class SettingsDialog(QDialog):
self.config.set('transcription.post_speech_silence_duration', float(self.post_silence_input.text()))
self.config.set('transcription.min_length_of_recording', float(self.min_recording_input.text()))
self.config.set('transcription.pre_recording_buffer_duration', float(self.pre_buffer_input.text()))
self.config.set('transcription.continuous_mode', self.continuous_mode_check.isChecked())
# Display settings
self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
max_lines = int(self.maxlines_input.text())
self.config.set('display.max_lines', max_lines)
# Display font settings (also used for server sync)
self.config.set('display.font_source', self.display_font_source_combo.currentText())
self.config.set('display.font_family', self.font_family_combo.currentText())
self.config.set('display.websafe_font', self.display_websafe_combo.currentText())
self.config.set('display.google_font', self.display_google_font_combo.currentText())
self.config.set('display.custom_font_file', self.display_custom_font_input.text())
font_size = int(self.font_size_input.text())
self.config.set('display.font_size', font_size)
fade_seconds = int(self.fade_seconds_input.text())
@@ -486,6 +722,12 @@ class SettingsDialog(QDialog):
self.config.set('server_sync.room', self.server_room_input.text())
self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
# Remote processing settings
self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
self.config.set('remote_processing.server_url', self.remote_url_input.text())
self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
# Call save callback (which will show the success message)
if self.on_save:
self.on_save()

View File

@@ -1,7 +1,7 @@
"""PySide6 transcription display widget for showing real-time transcriptions."""
from PySide6.QtWidgets import QTextEdit
from PySide6.QtGui import QFont, QTextCursor
from PySide6.QtGui import QFont, QTextCursor, QTextCharFormat, QColor
from PySide6.QtCore import Qt, Slot
from datetime import datetime
@@ -28,6 +28,10 @@ class TranscriptionDisplay(QTextEdit):
self.font_family = font_family
self.font_size = font_size
# Track the current preview line for two-stage transcription
self.preview_line_index = -1 # -1 means no active preview
self.preview_block_number = -1 # Block number for the preview line
# Configure text widget
self.setReadOnly(True)
self.setFont(QFont(font_family, font_size))
@@ -43,6 +47,36 @@ class TranscriptionDisplay(QTextEdit):
}
""")
def _format_line(self, text: str, user_name: str, timestamp: datetime, is_preview: bool = False) -> str:
"""
Format a transcription line.
Args:
text: Transcription text
user_name: User/speaker name
timestamp: Timestamp of transcription
is_preview: Whether this is a preview line
Returns:
Formatted line string
"""
line_parts = []
if self.show_timestamps:
time_str = timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if user_name and user_name.strip():
line_parts.append(f"{user_name}:")
# Add preview indicator for visual distinction
if is_preview:
line_parts.append(f"[...] {text}")
else:
line_parts.append(text)
return " ".join(line_parts)
@Slot(str, str)
def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
"""
@@ -56,35 +90,130 @@ class TranscriptionDisplay(QTextEdit):
if timestamp is None:
timestamp = datetime.now()
# Build the display line
line_parts = []
line = self._format_line(text, user_name, timestamp, is_preview=False)
if self.show_timestamps:
time_str = timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if user_name:
line_parts.append(f"{user_name}:")
line_parts.append(text)
line = " ".join(line_parts)
# Add to display
self.append(line)
# If there's an active preview, replace it instead of appending
if self.preview_line_index >= 0:
self._replace_preview_with_final(line)
else:
# Add to display normally
self.append(line)
self.line_count += 1
# Auto-scroll to bottom
cursor = self.textCursor()
cursor.movePosition(QTextCursor.End)
self.setTextCursor(cursor)
# Track line count
self.line_count += 1
# Remove old lines if exceeding max
if self.line_count > self.max_lines:
self._remove_oldest_lines(self.line_count - self.max_lines)
@Slot(str, str)
def add_preview(self, text: str, user_name: str = "", timestamp: datetime = None):
"""
Add a preview transcription that will be replaced by the final transcription.
Args:
text: Preview transcription text
user_name: User/speaker name
timestamp: Timestamp of transcription
"""
if timestamp is None:
timestamp = datetime.now()
line = self._format_line(text, user_name, timestamp, is_preview=True)
# If there's already a preview, replace it
if self.preview_line_index >= 0:
self._replace_preview_line(line)
else:
# Add new preview line
cursor = self.textCursor()
cursor.movePosition(QTextCursor.End)
# Apply italic formatting for preview
fmt = QTextCharFormat()
fmt.setFontItalic(True)
if self.line_count > 0:
cursor.insertText("\n")
cursor.insertText(line, fmt)
self.preview_line_index = self.line_count
self.preview_block_number = self.document().blockCount() - 1
self.line_count += 1
# Auto-scroll to bottom
cursor = self.textCursor()
cursor.movePosition(QTextCursor.End)
self.setTextCursor(cursor)
def _replace_preview_line(self, new_text: str):
"""Replace the current preview line with new preview text."""
if self.preview_block_number < 0:
return
doc = self.document()
block = doc.findBlockByNumber(self.preview_block_number)
if block.isValid():
cursor = QTextCursor(block)
cursor.select(QTextCursor.BlockUnderCursor)
# Apply italic formatting for preview
fmt = QTextCharFormat()
fmt.setFontItalic(True)
cursor.removeSelectedText()
cursor.insertText(new_text, fmt)
def _replace_preview_with_final(self, final_text: str):
"""Replace the preview line with final transcription."""
if self.preview_block_number < 0:
# No preview to replace, just add normally
self.append(final_text)
self.line_count += 1
self.preview_line_index = -1
self.preview_block_number = -1
return
doc = self.document()
block = doc.findBlockByNumber(self.preview_block_number)
if block.isValid():
cursor = QTextCursor(block)
cursor.select(QTextCursor.BlockUnderCursor)
# Apply normal formatting for final text
fmt = QTextCharFormat()
fmt.setFontItalic(False)
fmt.setForeground(QColor(255, 255, 255)) # White for final
cursor.removeSelectedText()
cursor.insertText(final_text, fmt)
# Clear preview tracking
self.preview_line_index = -1
self.preview_block_number = -1
def clear_preview(self):
"""Clear the current preview without adding a final transcription."""
if self.preview_block_number >= 0:
doc = self.document()
block = doc.findBlockByNumber(self.preview_block_number)
if block.isValid():
cursor = QTextCursor(block)
cursor.select(QTextCursor.BlockUnderCursor)
cursor.removeSelectedText()
cursor.deleteChar() # Remove newline
self.line_count -= 1
self.preview_line_index = -1
self.preview_block_number = -1
def _remove_oldest_lines(self, num_lines: int):
"""
Remove oldest lines from the display.
@@ -102,10 +231,20 @@ class TranscriptionDisplay(QTextEdit):
self.line_count -= num_lines
# Adjust preview tracking if lines were removed
if self.preview_line_index >= 0:
self.preview_line_index -= num_lines
self.preview_block_number -= num_lines
if self.preview_line_index < 0:
self.preview_line_index = -1
self.preview_block_number = -1
def clear_all(self):
"""Clear all transcriptions."""
self.clear()
self.line_count = 0
self.preview_line_index = -1
self.preview_block_number = -1
def get_all_text(self) -> str:
"""