Add unified per-speaker font support and remote transcription service

Font changes:
- Consolidate font settings into single Display Settings section
- Support Web-Safe, Google Fonts, and Custom File uploads for both displays
- Fix Google Fonts URL encoding (use + instead of %2B for spaces)
- Fix per-speaker font inline style quote escaping in Node.js display
- Add font debug logging to help diagnose font issues
- Update web server to sync all font settings on settings change
- Remove deprecated PHP server documentation files

New features:
- Add remote transcription service for GPU offloading
- Add instance lock to prevent multiple app instances
- Add version tracking

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions

View File

@@ -3,10 +3,11 @@
from PySide6.QtWidgets import (
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
QFileDialog
)
from PySide6.QtCore import Qt
from PySide6.QtGui import QScreen
from PySide6.QtGui import QScreen, QFontDatabase
from typing import Callable, List, Tuple
@@ -179,6 +180,16 @@ class SettingsDialog(QDialog):
self.realtime_model_combo.addItems(["tiny", "tiny.en", "base", "base.en"])
realtime_layout.addRow("Preview Model:", self.realtime_model_combo)
self.realtime_pause_input = QLineEdit()
self.realtime_pause_input.setToolTip(
"Seconds between preview updates:\n"
"• Lower values = More responsive, more frequent updates\n"
"• Higher values = Less CPU usage, updates less often\n"
"• 0.1 is recommended for real-time streaming\n"
"• Try 0.05 for even faster updates"
)
realtime_layout.addRow("Preview Update Interval (s):", self.realtime_pause_input)
realtime_group.setLayout(realtime_layout)
content_layout.addWidget(realtime_group)
@@ -261,6 +272,16 @@ class SettingsDialog(QDialog):
)
timing_layout.addRow("Pre-Recording Buffer (s):", self.pre_buffer_input)
self.continuous_mode_check = QCheckBox()
self.continuous_mode_check.setToolTip(
"Fast Speaker Mode:\n"
"• For speakers who talk quickly without pauses\n"
"• Reduces silence detection thresholds\n"
"• Produces more frequent transcription outputs\n"
"• May result in more fragmented sentences"
)
timing_layout.addRow("Fast Speaker Mode:", self.continuous_mode_check)
timing_group.setLayout(timing_layout)
content_layout.addWidget(timing_group)
@@ -281,10 +302,79 @@ class SettingsDialog(QDialog):
)
display_layout.addRow("Max Lines:", self.maxlines_input)
# Font source selector (shared for local display and server sync)
self.display_font_source_combo = QComboBox()
self.display_font_source_combo.addItems(["System Font", "Web-Safe", "Google Font", "Custom File"])
self.display_font_source_combo.setToolTip(
"Choose font for local display and server sync:\n"
"• System Font - Local only (won't work with server sync)\n"
"• Web-Safe - Universal fonts (Arial, Comic Sans, etc.)\n"
"• Google Font - Free fonts from fonts.google.com\n"
"• Custom File - Upload your own font file"
)
self.display_font_source_combo.currentTextChanged.connect(self._on_display_font_source_changed)
display_layout.addRow("Font Source:", self.display_font_source_combo)
# System font selector
self.font_family_combo = QComboBox()
self.font_family_combo.setToolTip("Font family for transcription display")
self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
display_layout.addRow("Font Family:", self.font_family_combo)
self.font_family_combo.setToolTip("Font family for transcription display (system fonts)")
self.font_family_combo.setEditable(True)
self.font_family_combo.setMaxVisibleItems(20)
system_fonts = QFontDatabase.families()
common_fonts = ["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"]
ordered_fonts = []
for font in common_fonts:
if font in system_fonts:
ordered_fonts.append(font)
for font in sorted(system_fonts):
if font not in ordered_fonts:
ordered_fonts.append(font)
self.font_family_combo.addItems(ordered_fonts)
display_layout.addRow("System Font:", self.font_family_combo)
# Web-safe font selector for display
self.display_websafe_combo = QComboBox()
display_websafe_fonts = [
"Arial", "Arial Black", "Comic Sans MS", "Courier New",
"Georgia", "Impact", "Lucida Console", "Lucida Sans Unicode",
"Palatino Linotype", "Tahoma", "Times New Roman", "Trebuchet MS", "Verdana"
]
self.display_websafe_combo.addItems(display_websafe_fonts)
self.display_websafe_combo.setToolTip("Web-safe fonts work on all systems")
display_layout.addRow("Web-Safe Font:", self.display_websafe_combo)
# Google Font selector for display
self.display_google_font_combo = QComboBox()
display_google_fonts = [
"Roboto", "Open Sans", "Lato", "Montserrat", "Poppins",
"Nunito", "Raleway", "Ubuntu", "Rubik", "Work Sans",
"Inter", "Outfit", "Quicksand", "Comfortaa", "Varela Round",
"Playfair Display", "Merriweather", "Lora", "PT Serif", "Crimson Text",
"Roboto Mono", "Source Code Pro", "Fira Code", "JetBrains Mono", "IBM Plex Mono",
"Bebas Neue", "Oswald", "Righteous", "Bangers", "Permanent Marker",
"Pacifico", "Lobster", "Dancing Script", "Caveat", "Satisfy"
]
self.display_google_font_combo.addItems(display_google_fonts)
self.display_google_font_combo.setToolTip("Select a Google Font for display")
display_layout.addRow("Google Font:", self.display_google_font_combo)
# Custom font file picker (for server sync upload)
custom_font_layout = QHBoxLayout()
self.display_custom_font_input = QLineEdit()
self.display_custom_font_input.setPlaceholderText("No file selected")
self.display_custom_font_input.setReadOnly(True)
self.display_custom_font_input.setToolTip(
"Select a font file to use:\n"
"• Supports .ttf, .otf, .woff, .woff2 files\n"
"• Font is uploaded to server automatically when using Server Sync"
)
custom_font_layout.addWidget(self.display_custom_font_input)
self.display_custom_font_browse = QPushButton("Browse...")
self.display_custom_font_browse.clicked.connect(self._browse_display_custom_font)
custom_font_layout.addWidget(self.display_custom_font_browse)
display_layout.addRow("Custom Font File:", custom_font_layout)
self.font_size_input = QLineEdit()
self.font_size_input.setToolTip("Font size in pixels (12-20 recommended)")
@@ -301,6 +391,9 @@ class SettingsDialog(QDialog):
display_group.setLayout(display_layout)
content_layout.addWidget(display_group)
# Initially show only System Font (default)
self._on_display_font_source_changed("System Font")
# Server Sync Group
server_group = QGroupBox("Multi-User Server Sync (Optional)")
server_layout = QFormLayout()
@@ -339,9 +432,55 @@ class SettingsDialog(QDialog):
)
server_layout.addRow("Passphrase:", self.server_passphrase_input)
# Note about font settings
font_note = QLabel("Font settings are in Display Settings above")
font_note.setStyleSheet("color: #666; font-style: italic;")
server_layout.addRow("", font_note)
server_group.setLayout(server_layout)
content_layout.addWidget(server_group)
# Remote Processing Group
remote_group = QGroupBox("Remote Processing (GPU Offload)")
remote_layout = QFormLayout()
remote_layout.setSpacing(10)
self.remote_enabled_check = QCheckBox()
self.remote_enabled_check.setToolTip(
"Enable remote transcription processing:\n"
"• Offload transcription to a GPU-equipped server\n"
"• Reduces local CPU/GPU usage\n"
"• Requires running the remote transcription service"
)
remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
self.remote_url_input = QLineEdit()
self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
self.remote_url_input.setToolTip(
"WebSocket URL of the remote transcription service:\n"
"• Format: ws://host:port/ws/transcribe\n"
"• Use wss:// for secure connections"
)
remote_layout.addRow("Server URL:", self.remote_url_input)
self.remote_api_key_input = QLineEdit()
self.remote_api_key_input.setEchoMode(QLineEdit.Password)
self.remote_api_key_input.setPlaceholderText("your-api-key")
self.remote_api_key_input.setToolTip(
"API key for authentication with the remote service"
)
remote_layout.addRow("API Key:", self.remote_api_key_input)
self.remote_fallback_check = QCheckBox("Enable")
self.remote_fallback_check.setChecked(True)
self.remote_fallback_check.setToolTip(
"Fall back to local transcription if remote service is unavailable"
)
remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
remote_group.setLayout(remote_layout)
content_layout.addWidget(remote_group)
# Add stretch to push everything to the top
content_layout.addStretch()
@@ -367,6 +506,77 @@ class SettingsDialog(QDialog):
"""Update the Silero sensitivity label."""
self.silero_label.setText(f"{value / 100:.2f}")
def _open_fonts_folder(self):
"""Open the custom fonts folder in the system file manager."""
import subprocess
import sys
from pathlib import Path
fonts_dir = self.config.fonts_dir
# Ensure the folder exists
fonts_dir.mkdir(parents=True, exist_ok=True)
# Open the folder in the system file manager
if sys.platform == 'win32':
subprocess.run(['explorer', str(fonts_dir)])
elif sys.platform == 'darwin':
subprocess.run(['open', str(fonts_dir)])
else:
# Linux
subprocess.run(['xdg-open', str(fonts_dir)])
def _on_display_font_source_changed(self, source: str):
"""Show/hide display font inputs based on selected source."""
# Hide all font-specific inputs first
self.font_family_combo.setVisible(False)
self.display_websafe_combo.setVisible(False)
self.display_google_font_combo.setVisible(False)
self.display_custom_font_input.setVisible(False)
self.display_custom_font_browse.setVisible(False)
# Find the form layout rows and hide/show labels too
parent = self.display_font_source_combo.parent()
display_layout = parent.layout() if parent else None
if display_layout and hasattr(display_layout, 'rowCount'):
for i in range(display_layout.rowCount()):
label = display_layout.itemAt(i, QFormLayout.LabelRole)
field = display_layout.itemAt(i, QFormLayout.FieldRole)
if label and field:
label_widget = label.widget()
if label_widget:
label_text = label_widget.text()
if label_text == "System Font:":
label_widget.setVisible(source == "System Font")
elif label_text == "Web-Safe Font:":
label_widget.setVisible(source == "Web-Safe")
elif label_text == "Google Font:":
label_widget.setVisible(source == "Google Font")
elif label_text == "Custom Font File:":
label_widget.setVisible(source == "Custom File")
# Show the relevant input
if source == "System Font":
self.font_family_combo.setVisible(True)
elif source == "Web-Safe":
self.display_websafe_combo.setVisible(True)
elif source == "Google Font":
self.display_google_font_combo.setVisible(True)
elif source == "Custom File":
self.display_custom_font_input.setVisible(True)
self.display_custom_font_browse.setVisible(True)
def _browse_display_custom_font(self):
"""Browse for a custom font file."""
file_path, _ = QFileDialog.getOpenFileName(
self,
"Select Font File",
"",
"Font Files (*.ttf *.otf *.woff *.woff2);;All Files (*)"
)
if file_path:
self.display_custom_font_input.setText(file_path)
def _load_current_settings(self):
"""Load current settings from config."""
# User settings
@@ -402,6 +612,7 @@ class SettingsDialog(QDialog):
self.realtime_enabled_check.setChecked(self.config.get('transcription.enable_realtime_transcription', False))
realtime_model = self.config.get('transcription.realtime_model', 'tiny.en')
self.realtime_model_combo.setCurrentText(realtime_model)
self.realtime_pause_input.setText(str(self.config.get('transcription.realtime_processing_pause', 0.1)))
# VAD settings
silero_sens = self.config.get('transcription.silero_sensitivity', 0.4)
@@ -417,13 +628,23 @@ class SettingsDialog(QDialog):
self.post_silence_input.setText(str(self.config.get('transcription.post_speech_silence_duration', 0.3)))
self.min_recording_input.setText(str(self.config.get('transcription.min_length_of_recording', 0.5)))
self.pre_buffer_input.setText(str(self.config.get('transcription.pre_recording_buffer_duration', 0.2)))
self.continuous_mode_check.setChecked(self.config.get('transcription.continuous_mode', False))
# Display settings
self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
# Display font settings
display_font_source = self.config.get('display.font_source', 'System Font')
self.display_font_source_combo.setCurrentText(display_font_source)
font_family = self.config.get('display.font_family', 'Courier')
self.font_family_combo.setCurrentText(font_family)
self.display_websafe_combo.setCurrentText(self.config.get('display.websafe_font', 'Arial'))
display_google_font = self.config.get('display.google_font', 'Roboto')
if display_google_font:
self.display_google_font_combo.setCurrentText(display_google_font)
self.display_custom_font_input.setText(self.config.get('display.custom_font_file', ''))
self._on_display_font_source_changed(display_font_source)
self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
@@ -434,6 +655,12 @@ class SettingsDialog(QDialog):
self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
# Remote processing settings
self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
def _save_settings(self):
"""Save settings to config."""
try:
@@ -459,6 +686,7 @@ class SettingsDialog(QDialog):
# Realtime preview
self.config.set('transcription.enable_realtime_transcription', self.realtime_enabled_check.isChecked())
self.config.set('transcription.realtime_model', self.realtime_model_combo.currentText())
self.config.set('transcription.realtime_processing_pause', float(self.realtime_pause_input.text()))
# VAD settings
self.config.set('transcription.silero_sensitivity', self.silero_slider.value() / 100.0)
@@ -469,12 +697,20 @@ class SettingsDialog(QDialog):
self.config.set('transcription.post_speech_silence_duration', float(self.post_silence_input.text()))
self.config.set('transcription.min_length_of_recording', float(self.min_recording_input.text()))
self.config.set('transcription.pre_recording_buffer_duration', float(self.pre_buffer_input.text()))
self.config.set('transcription.continuous_mode', self.continuous_mode_check.isChecked())
# Display settings
self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
max_lines = int(self.maxlines_input.text())
self.config.set('display.max_lines', max_lines)
# Display font settings (also used for server sync)
self.config.set('display.font_source', self.display_font_source_combo.currentText())
self.config.set('display.font_family', self.font_family_combo.currentText())
self.config.set('display.websafe_font', self.display_websafe_combo.currentText())
self.config.set('display.google_font', self.display_google_font_combo.currentText())
self.config.set('display.custom_font_file', self.display_custom_font_input.text())
font_size = int(self.font_size_input.text())
self.config.set('display.font_size', font_size)
fade_seconds = int(self.fade_seconds_input.text())
@@ -486,6 +722,12 @@ class SettingsDialog(QDialog):
self.config.set('server_sync.room', self.server_room_input.text())
self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
# Remote processing settings
self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
self.config.set('remote_processing.server_url', self.remote_url_input.text())
self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
# Call save callback (which will show the success message)
if self.on_save:
self.on_save()