Add unified per-speaker font support and remote transcription service

Font changes:
- Consolidate font settings into single Display Settings section
- Support Web-Safe, Google Fonts, and Custom File uploads for both displays
- Fix Google Fonts URL encoding (use + instead of %2B for spaces)
- Fix per-speaker font inline style quote escaping in Node.js display
- Add font debug logging to help diagnose font issues
- Update web server to sync all font settings on settings change
- Remove deprecated PHP server documentation files

New features:
- Add remote transcription service for GPU offloading
- Add instance lock to prevent multiple app instances
- Add version tracking

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions

View File

@@ -9,16 +9,16 @@ from PySide6.QtGui import QFont
from pathlib import Path
import sys
# Add parent directory to path for imports
sys.path.append(str(Path(__file__).parent.parent))
# Add parent directory to path for imports (resolve symlinks)
sys.path.append(str(Path(__file__).resolve().parent.parent))
from client.config import Config
from client.device_utils import DeviceManager
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
from client.server_sync import ServerSyncClient
from gui.transcription_display_qt import TranscriptionDisplay
from gui.settings_dialog_qt import SettingsDialog
from server.web_display import TranscriptionWebServer
from version import __version__
import asyncio
from threading import Thread
@@ -96,9 +96,13 @@ class MainWindow(QMainWindow):
# Server sync components
self.server_sync_client: ServerSyncClient = None
# Store all transcriptions for saving (separate from display)
self.transcriptions: list = []
# Configure window
self.setWindowTitle("Local Transcription")
self.resize(900, 700)
self.resize(700, 300)
self.setMinimumSize(600, 280)
# Set application icon
# In PyInstaller frozen executables, use _MEIPASS for bundled files
@@ -108,7 +112,7 @@ class MainWindow(QMainWindow):
icon_path = Path(sys._MEIPASS) / "LocalTranscription.png"
else:
# Running in normal Python
icon_path = Path(__file__).parent.parent / "LocalTranscription.png"
icon_path = Path(__file__).resolve().parent.parent / "LocalTranscription.png"
if icon_path.exists():
from PySide6.QtGui import QIcon
@@ -174,13 +178,14 @@ class MainWindow(QMainWindow):
# Status bar
status_widget = QWidget()
status_widget.setFixedHeight(60)
status_widget.setFixedHeight(40)
status_layout = QHBoxLayout()
status_layout.setContentsMargins(0, 0, 0, 0)
status_widget.setLayout(status_layout)
self.status_label = QLabel("⚫ Initializing...")
status_font = QFont()
status_font.setPointSize(14)
status_font.setPointSize(12)
self.status_label.setFont(status_font)
status_layout.addWidget(self.status_label)
@@ -193,28 +198,36 @@ class MainWindow(QMainWindow):
self.user_label = QLabel(f"User: {user_name}")
status_layout.addWidget(self.user_label)
# Web display link
web_host = self.config.get('web_server.host', '127.0.0.1')
web_port = self.config.get('web_server.port', 8080)
web_url = f"http://{web_host}:{web_port}"
self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
self.web_link.setOpenExternalLinks(True)
self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
status_layout.addWidget(self.web_link)
status_layout.addStretch()
main_layout.addWidget(status_widget)
# Transcription display
self.transcription_display = TranscriptionDisplay(
max_lines=self.config.get('display.max_lines', 100),
show_timestamps=self.config.get('display.show_timestamps', True),
font_family=self.config.get('display.font_family', 'Courier'),
font_size=self.config.get('display.font_size', 12)
)
main_layout.addWidget(self.transcription_display)
# Web display links section
links_widget = QWidget()
links_layout = QVBoxLayout()
links_layout.setContentsMargins(0, 5, 0, 5)
links_layout.setSpacing(5)
links_widget.setLayout(links_layout)
# Local web display link
web_host = self.config.get('web_server.host', '127.0.0.1')
web_port = self.config.get('web_server.port', 8080)
web_url = f"http://{web_host}:{web_port}"
self.web_link = QLabel(f'🌐 Local Web Display: <a href="{web_url}">{web_url}</a>')
self.web_link.setOpenExternalLinks(True)
self.web_link.setToolTip("Click to open in browser (for OBS)")
self.web_link.setStyleSheet("QLabel a { color: #4CAF50; }")
links_layout.addWidget(self.web_link)
# Multi-user sync display link (shown when server sync is enabled)
self.sync_link = QLabel("")
self.sync_link.setOpenExternalLinks(True)
self.sync_link.setStyleSheet("QLabel a { color: #2196F3; }")
self.sync_link.setVisible(False)
links_layout.addWidget(self.sync_link)
self._update_sync_link()
main_layout.addWidget(links_widget)
# Control buttons
control_widget = QWidget()
@@ -232,7 +245,7 @@ class MainWindow(QMainWindow):
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
control_layout.addWidget(self.start_button)
self.clear_button = QPushButton("Clear")
self.clear_button = QPushButton("🗑 Clear")
self.clear_button.setFixedSize(120, 50)
self.clear_button.clicked.connect(self._clear_transcriptions)
control_layout.addWidget(self.clear_button)
@@ -246,6 +259,12 @@ class MainWindow(QMainWindow):
main_layout.addWidget(control_widget)
# Version label (bottom right)
version_label = QLabel(f"v{__version__}")
version_label.setStyleSheet("QLabel { color: #666; font-size: 10px; }")
version_label.setAlignment(Qt.AlignRight)
main_layout.addWidget(version_label)
def _initialize_components(self):
"""Initialize RealtimeSTT transcription engine."""
# Update status
@@ -271,6 +290,20 @@ class MainWindow(QMainWindow):
user_name = self.config.get('user.name', 'User')
# Check for continuous/fast speaker mode
continuous_mode = self.config.get('transcription.continuous_mode', False)
# Get timing settings - use faster values if continuous mode is enabled
if continuous_mode:
# Faster settings for speakers who talk without pauses
post_speech_silence = 0.15 # Reduced from default 0.3
min_gap = 0.0 # No gap between recordings
min_recording = 0.3 # Shorter minimum recording
else:
post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
self.transcription_engine = RealtimeTranscriptionEngine(
model=model,
device=device,
@@ -278,12 +311,13 @@ class MainWindow(QMainWindow):
compute_type=compute_type,
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
post_speech_silence_duration=self.config.get('transcription.post_speech_silence_duration', 0.3),
min_length_of_recording=self.config.get('transcription.min_length_of_recording', 0.5),
min_gap_between_recordings=self.config.get('transcription.min_gap_between_recordings', 0.0),
post_speech_silence_duration=post_speech_silence,
min_length_of_recording=min_recording,
min_gap_between_recordings=min_gap,
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
beam_size=self.config.get('transcription.beam_size', 5),
initial_prompt=self.config.get('transcription.initial_prompt', ''),
@@ -332,6 +366,12 @@ class MainWindow(QMainWindow):
max_lines = self.config.get('display.max_lines', 50)
font_family = self.config.get('display.font_family', 'Arial')
font_size = self.config.get('display.font_size', 16)
fonts_dir = self.config.fonts_dir # Custom fonts directory
# Font source settings
font_source = self.config.get('display.font_source', 'System Font')
websafe_font = self.config.get('display.websafe_font', 'Arial')
google_font = self.config.get('display.google_font', 'Roboto')
# Try up to 5 ports if the default is in use
ports_to_try = [port] + [port + i for i in range(1, 5)]
@@ -346,7 +386,11 @@ class MainWindow(QMainWindow):
fade_after_seconds=fade_after_seconds,
max_lines=max_lines,
font_family=font_family,
font_size=font_size
font_size=font_size,
fonts_dir=fonts_dir,
font_source=font_source,
websafe_font=websafe_font,
google_font=google_font
)
self.web_server_thread = WebServerThread(self.web_server)
self.web_server_thread.start()
@@ -450,15 +494,21 @@ class MainWindow(QMainWindow):
return
try:
# Update display with preview (thread-safe Qt call)
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, f"[PREVIEW] {result.text}"),
Q_ARG(str, result.user_name)
)
# Broadcast preview to local web server
if self.web_server and self.web_server_thread and self.web_server_thread.loop:
asyncio.run_coroutine_threadsafe(
self.web_server.broadcast_preview(
result.text,
result.user_name,
result.timestamp
),
self.web_server_thread.loop
)
# Send preview to server sync if enabled
if self.server_sync_client:
self.server_sync_client.send_preview(result.text, result.timestamp)
except Exception as e:
print(f"Error handling realtime transcription: {e}")
@@ -468,15 +518,8 @@ class MainWindow(QMainWindow):
return
try:
# Update display (thread-safe Qt call)
from PySide6.QtCore import QMetaObject, Q_ARG
QMetaObject.invokeMethod(
self.transcription_display,
"add_transcription",
Qt.QueuedConnection,
Q_ARG(str, result.text),
Q_ARG(str, result.user_name)
)
# Store transcription for saving
self.transcriptions.append(result)
# Broadcast to web server if enabled
if self.web_server and self.web_server_thread:
@@ -508,18 +551,27 @@ class MainWindow(QMainWindow):
def _clear_transcriptions(self):
"""Clear all transcriptions."""
if not self.transcriptions:
QMessageBox.information(self, "No Transcriptions", "There are no transcriptions to clear.")
return
reply = QMessageBox.question(
self,
"Clear Transcriptions",
"Are you sure you want to clear all transcriptions?",
f"Are you sure you want to clear {len(self.transcriptions)} transcription(s)?",
QMessageBox.Yes | QMessageBox.No
)
if reply == QMessageBox.Yes:
self.transcription_display.clear_all()
self.transcriptions.clear()
QMessageBox.information(self, "Cleared", "All transcriptions have been cleared.")
def _save_transcriptions(self):
"""Save transcriptions to file."""
if not self.transcriptions:
QMessageBox.warning(self, "No Transcriptions", "There are no transcriptions to save.")
return
filepath, _ = QFileDialog.getSaveFileName(
self,
"Save Transcriptions",
@@ -528,10 +580,21 @@ class MainWindow(QMainWindow):
)
if filepath:
if self.transcription_display.save_to_file(filepath):
try:
show_timestamps = self.config.get('display.show_timestamps', True)
with open(filepath, 'w', encoding='utf-8') as f:
for result in self.transcriptions:
line_parts = []
if show_timestamps:
time_str = result.timestamp.strftime("%H:%M:%S")
line_parts.append(f"[{time_str}]")
if result.user_name and result.user_name.strip():
line_parts.append(f"{result.user_name}:")
line_parts.append(result.text)
f.write(" ".join(line_parts) + "\n")
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
else:
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
except Exception as e:
QMessageBox.critical(self, "Error", f"Failed to save transcriptions:\n{e}")
def _open_settings(self):
"""Open settings dialog."""
@@ -569,22 +632,20 @@ class MainWindow(QMainWindow):
user_name = self.config.get('user.name', 'User')
self.user_label.setText(f"User: {user_name}")
# Update display settings
show_timestamps = self.config.get('display.show_timestamps', True)
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
self.transcription_display.set_show_timestamps(show_timestamps)
self.transcription_display.set_font(
self.config.get('display.font_family', 'Courier'),
self.config.get('display.font_size', 12)
)
# Update web server settings
if self.web_server:
self.web_server.show_timestamps = show_timestamps
self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
self.web_server.max_lines = self.config.get('display.max_lines', 50)
self.web_server.font_family = self.config.get('display.font_family', 'Arial')
self.web_server.font_size = self.config.get('display.font_size', 16)
# Update font source settings
self.web_server.font_source = self.config.get('display.font_source', 'System Font')
self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
# Update sync link visibility based on server sync settings
self._update_sync_link()
# Restart server sync if it was running and settings changed
if self.is_transcribing and self.server_sync_client:
@@ -656,18 +717,33 @@ class MainWindow(QMainWindow):
room = self.config.get('server_sync.room', 'default')
passphrase = self.config.get('server_sync.passphrase', '')
user_name = self.config.get('user.name', 'User')
fonts_dir = self.config.fonts_dir # Custom fonts directory
# Font settings (shared with display settings)
# Note: "System Font" only works locally, so we treat it as "None" for server sync
font_source = self.config.get('display.font_source', 'System Font')
if font_source == "System Font":
font_source = "None" # System fonts don't work on remote displays
websafe_font = self.config.get('display.websafe_font', '')
google_font = self.config.get('display.google_font', '')
custom_font_file = self.config.get('display.custom_font_file', '')
if not url:
print("Server sync enabled but no URL configured")
return
print(f"Starting server sync: {url}, room: {room}, user: {user_name}")
print(f"Starting server sync: {url}, room: {room}, user: {user_name}, font: {font_source}")
self.server_sync_client = ServerSyncClient(
url=url,
room=room,
passphrase=passphrase,
user_name=user_name
user_name=user_name,
fonts_dir=fonts_dir,
font_source=font_source,
websafe_font=websafe_font if websafe_font else None,
google_font=google_font if google_font else None,
custom_font_file=custom_font_file if custom_font_file else None
)
self.server_sync_client.start()
@@ -679,6 +755,40 @@ class MainWindow(QMainWindow):
f"Failed to start server sync:\n{e}\n\nTranscription will continue locally."
)
def _update_sync_link(self):
"""Update the multi-user sync link visibility and URL."""
server_sync_enabled = self.config.get('server_sync.enabled', False)
server_url = self.config.get('server_sync.url', '')
room = self.config.get('server_sync.room', 'default')
if server_sync_enabled and server_url:
# Extract base URL from the API endpoint (e.g., http://server:3000/api/send -> http://server:3000)
try:
from urllib.parse import urlparse, urlencode
parsed = urlparse(server_url)
base_url = f"{parsed.scheme}://{parsed.netloc}"
# Get display settings to pass as URL parameters
params = {
'room': room,
'fontfamily': self.config.get('display.font_family', 'Arial'),
'fontsize': self.config.get('display.font_size', 16),
'fade': self.config.get('display.fade_after_seconds', 10),
'timestamps': 'true' if self.config.get('display.show_timestamps', True) else 'false',
'maxlines': self.config.get('display.max_lines', 50)
}
display_url = f"{base_url}/display?{urlencode(params)}"
# Show shorter text with just address and room
display_text = f"{base_url} (room: {room})"
self.sync_link.setText(f'🔗 Multi-User Display: <a href="{display_url}">{display_text}</a>')
self.sync_link.setToolTip(f"Click to open: {display_url}")
self.sync_link.setVisible(True)
except Exception as e:
print(f"Error parsing server URL: {e}")
self.sync_link.setVisible(False)
else:
self.sync_link.setVisible(False)
def closeEvent(self, event):
"""Handle window closing."""
# Stop transcription if running