Add unified per-speaker font support and remote transcription service
Font changes: - Consolidate font settings into single Display Settings section - Support Web-Safe, Google Fonts, and Custom File uploads for both displays - Fix Google Fonts URL encoding (use + instead of %2B for spaces) - Fix per-speaker font inline style quote escaping in Node.js display - Add font debug logging to help diagnose font issues - Update web server to sync all font settings on settings change - Remove deprecated PHP server documentation files New features: - Add remote transcription service for GPU offloading - Add instance lock to prevent multiple app instances - Add version tracking Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -9,16 +9,16 @@ from PySide6.QtGui import QFont
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
# Add parent directory to path for imports (resolve symlinks)
|
||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from client.config import Config
|
||||
from client.device_utils import DeviceManager
|
||||
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
||||
from client.server_sync import ServerSyncClient
|
||||
from gui.transcription_display_qt import TranscriptionDisplay
|
||||
from gui.settings_dialog_qt import SettingsDialog
|
||||
from server.web_display import TranscriptionWebServer
|
||||
from version import __version__
|
||||
import asyncio
|
||||
from threading import Thread
|
||||
|
||||
@@ -96,9 +96,13 @@ class MainWindow(QMainWindow):
|
||||
# Server sync components
|
||||
self.server_sync_client: ServerSyncClient = None
|
||||
|
||||
# Store all transcriptions for saving (separate from display)
|
||||
self.transcriptions: list = []
|
||||
|
||||
# Configure window
|
||||
self.setWindowTitle("Local Transcription")
|
||||
self.resize(900, 700)
|
||||
self.resize(700, 300)
|
||||
self.setMinimumSize(600, 280)
|
||||
|
||||
# Set application icon
|
||||
# In PyInstaller frozen executables, use _MEIPASS for bundled files
|
||||
@@ -108,7 +112,7 @@ class MainWindow(QMainWindow):
|
||||
icon_path = Path(sys._MEIPASS) / "LocalTranscription.png"
|
||||
else:
|
||||
# Running in normal Python
|
||||
icon_path = Path(__file__).parent.parent / "LocalTranscription.png"
|
||||
icon_path = Path(__file__).resolve().parent.parent / "LocalTranscription.png"
|
||||
|
||||
if icon_path.exists():
|
||||
from PySide6.QtGui import QIcon
|
||||
@@ -174,13 +178,14 @@ class MainWindow(QMainWindow):
|
||||
|
||||
# Status bar
|
||||
status_widget = QWidget()
|
||||
status_widget.setFixedHeight(60)
|
||||
status_widget.setFixedHeight(40)
|
||||
status_layout = QHBoxLayout()
|
||||
status_layout.setContentsMargins(0, 0, 0, 0)
|
||||
status_widget.setLayout(status_layout)
|
||||
|
||||
self.status_label = QLabel("⚫ Initializing...")
|
||||
status_font = QFont()
|
||||
status_font.setPointSize(14)
|
||||
status_font.setPointSize(12)
|
||||
self.status_label.setFont(status_font)
|
||||
status_layout.addWidget(self.status_label)
|
||||
|
||||
@@ -193,28 +198,36 @@ class MainWindow(QMainWindow):
|
||||
self.user_label = QLabel(f"User: {user_name}")
|
||||
status_layout.addWidget(self.user_label)
|
||||
|
||||
# Web display link
|
||||
web_host = self.config.get('web_server.host', '127.0.0.1')
|
||||
web_port = self.config.get('web_server.port', 8080)
|
||||
web_url = f"http://{web_host}:{web_port}"
|
||||
self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
|
||||
self.web_link.setOpenExternalLinks(True)
|
||||
self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
|
||||
self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
|
||||
status_layout.addWidget(self.web_link)
|
||||
|
||||
status_layout.addStretch()
|
||||
|
||||
main_layout.addWidget(status_widget)
|
||||
|
||||
# Transcription display
|
||||
self.transcription_display = TranscriptionDisplay(
|
||||
max_lines=self.config.get('display.max_lines', 100),
|
||||
show_timestamps=self.config.get('display.show_timestamps', True),
|
||||
font_family=self.config.get('display.font_family', 'Courier'),
|
||||
font_size=self.config.get('display.font_size', 12)
|
||||
)
|
||||
main_layout.addWidget(self.transcription_display)
|
||||
# Web display links section
|
||||
links_widget = QWidget()
|
||||
links_layout = QVBoxLayout()
|
||||
links_layout.setContentsMargins(0, 5, 0, 5)
|
||||
links_layout.setSpacing(5)
|
||||
links_widget.setLayout(links_layout)
|
||||
|
||||
# Local web display link
|
||||
web_host = self.config.get('web_server.host', '127.0.0.1')
|
||||
web_port = self.config.get('web_server.port', 8080)
|
||||
web_url = f"http://{web_host}:{web_port}"
|
||||
self.web_link = QLabel(f'🌐 Local Web Display: <a href="{web_url}">{web_url}</a>')
|
||||
self.web_link.setOpenExternalLinks(True)
|
||||
self.web_link.setToolTip("Click to open in browser (for OBS)")
|
||||
self.web_link.setStyleSheet("QLabel a { color: #4CAF50; }")
|
||||
links_layout.addWidget(self.web_link)
|
||||
|
||||
# Multi-user sync display link (shown when server sync is enabled)
|
||||
self.sync_link = QLabel("")
|
||||
self.sync_link.setOpenExternalLinks(True)
|
||||
self.sync_link.setStyleSheet("QLabel a { color: #2196F3; }")
|
||||
self.sync_link.setVisible(False)
|
||||
links_layout.addWidget(self.sync_link)
|
||||
self._update_sync_link()
|
||||
|
||||
main_layout.addWidget(links_widget)
|
||||
|
||||
# Control buttons
|
||||
control_widget = QWidget()
|
||||
@@ -232,7 +245,7 @@ class MainWindow(QMainWindow):
|
||||
self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
|
||||
control_layout.addWidget(self.start_button)
|
||||
|
||||
self.clear_button = QPushButton("Clear")
|
||||
self.clear_button = QPushButton("🗑 Clear")
|
||||
self.clear_button.setFixedSize(120, 50)
|
||||
self.clear_button.clicked.connect(self._clear_transcriptions)
|
||||
control_layout.addWidget(self.clear_button)
|
||||
@@ -246,6 +259,12 @@ class MainWindow(QMainWindow):
|
||||
|
||||
main_layout.addWidget(control_widget)
|
||||
|
||||
# Version label (bottom right)
|
||||
version_label = QLabel(f"v{__version__}")
|
||||
version_label.setStyleSheet("QLabel { color: #666; font-size: 10px; }")
|
||||
version_label.setAlignment(Qt.AlignRight)
|
||||
main_layout.addWidget(version_label)
|
||||
|
||||
def _initialize_components(self):
|
||||
"""Initialize RealtimeSTT transcription engine."""
|
||||
# Update status
|
||||
@@ -271,6 +290,20 @@ class MainWindow(QMainWindow):
|
||||
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
|
||||
# Check for continuous/fast speaker mode
|
||||
continuous_mode = self.config.get('transcription.continuous_mode', False)
|
||||
|
||||
# Get timing settings - use faster values if continuous mode is enabled
|
||||
if continuous_mode:
|
||||
# Faster settings for speakers who talk without pauses
|
||||
post_speech_silence = 0.15 # Reduced from default 0.3
|
||||
min_gap = 0.0 # No gap between recordings
|
||||
min_recording = 0.3 # Shorter minimum recording
|
||||
else:
|
||||
post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
|
||||
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
|
||||
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
|
||||
|
||||
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||
model=model,
|
||||
device=device,
|
||||
@@ -278,12 +311,13 @@ class MainWindow(QMainWindow):
|
||||
compute_type=compute_type,
|
||||
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
|
||||
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
|
||||
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
|
||||
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
|
||||
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
|
||||
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
|
||||
post_speech_silence_duration=self.config.get('transcription.post_speech_silence_duration', 0.3),
|
||||
min_length_of_recording=self.config.get('transcription.min_length_of_recording', 0.5),
|
||||
min_gap_between_recordings=self.config.get('transcription.min_gap_between_recordings', 0.0),
|
||||
post_speech_silence_duration=post_speech_silence,
|
||||
min_length_of_recording=min_recording,
|
||||
min_gap_between_recordings=min_gap,
|
||||
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
|
||||
beam_size=self.config.get('transcription.beam_size', 5),
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
@@ -332,6 +366,12 @@ class MainWindow(QMainWindow):
|
||||
max_lines = self.config.get('display.max_lines', 50)
|
||||
font_family = self.config.get('display.font_family', 'Arial')
|
||||
font_size = self.config.get('display.font_size', 16)
|
||||
fonts_dir = self.config.fonts_dir # Custom fonts directory
|
||||
|
||||
# Font source settings
|
||||
font_source = self.config.get('display.font_source', 'System Font')
|
||||
websafe_font = self.config.get('display.websafe_font', 'Arial')
|
||||
google_font = self.config.get('display.google_font', 'Roboto')
|
||||
|
||||
# Try up to 5 ports if the default is in use
|
||||
ports_to_try = [port] + [port + i for i in range(1, 5)]
|
||||
@@ -346,7 +386,11 @@ class MainWindow(QMainWindow):
|
||||
fade_after_seconds=fade_after_seconds,
|
||||
max_lines=max_lines,
|
||||
font_family=font_family,
|
||||
font_size=font_size
|
||||
font_size=font_size,
|
||||
fonts_dir=fonts_dir,
|
||||
font_source=font_source,
|
||||
websafe_font=websafe_font,
|
||||
google_font=google_font
|
||||
)
|
||||
self.web_server_thread = WebServerThread(self.web_server)
|
||||
self.web_server_thread.start()
|
||||
@@ -450,15 +494,21 @@ class MainWindow(QMainWindow):
|
||||
return
|
||||
|
||||
try:
|
||||
# Update display with preview (thread-safe Qt call)
|
||||
from PySide6.QtCore import QMetaObject, Q_ARG
|
||||
QMetaObject.invokeMethod(
|
||||
self.transcription_display,
|
||||
"add_transcription",
|
||||
Qt.QueuedConnection,
|
||||
Q_ARG(str, f"[PREVIEW] {result.text}"),
|
||||
Q_ARG(str, result.user_name)
|
||||
)
|
||||
# Broadcast preview to local web server
|
||||
if self.web_server and self.web_server_thread and self.web_server_thread.loop:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
self.web_server.broadcast_preview(
|
||||
result.text,
|
||||
result.user_name,
|
||||
result.timestamp
|
||||
),
|
||||
self.web_server_thread.loop
|
||||
)
|
||||
|
||||
# Send preview to server sync if enabled
|
||||
if self.server_sync_client:
|
||||
self.server_sync_client.send_preview(result.text, result.timestamp)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error handling realtime transcription: {e}")
|
||||
|
||||
@@ -468,15 +518,8 @@ class MainWindow(QMainWindow):
|
||||
return
|
||||
|
||||
try:
|
||||
# Update display (thread-safe Qt call)
|
||||
from PySide6.QtCore import QMetaObject, Q_ARG
|
||||
QMetaObject.invokeMethod(
|
||||
self.transcription_display,
|
||||
"add_transcription",
|
||||
Qt.QueuedConnection,
|
||||
Q_ARG(str, result.text),
|
||||
Q_ARG(str, result.user_name)
|
||||
)
|
||||
# Store transcription for saving
|
||||
self.transcriptions.append(result)
|
||||
|
||||
# Broadcast to web server if enabled
|
||||
if self.web_server and self.web_server_thread:
|
||||
@@ -508,18 +551,27 @@ class MainWindow(QMainWindow):
|
||||
|
||||
def _clear_transcriptions(self):
|
||||
"""Clear all transcriptions."""
|
||||
if not self.transcriptions:
|
||||
QMessageBox.information(self, "No Transcriptions", "There are no transcriptions to clear.")
|
||||
return
|
||||
|
||||
reply = QMessageBox.question(
|
||||
self,
|
||||
"Clear Transcriptions",
|
||||
"Are you sure you want to clear all transcriptions?",
|
||||
f"Are you sure you want to clear {len(self.transcriptions)} transcription(s)?",
|
||||
QMessageBox.Yes | QMessageBox.No
|
||||
)
|
||||
|
||||
if reply == QMessageBox.Yes:
|
||||
self.transcription_display.clear_all()
|
||||
self.transcriptions.clear()
|
||||
QMessageBox.information(self, "Cleared", "All transcriptions have been cleared.")
|
||||
|
||||
def _save_transcriptions(self):
|
||||
"""Save transcriptions to file."""
|
||||
if not self.transcriptions:
|
||||
QMessageBox.warning(self, "No Transcriptions", "There are no transcriptions to save.")
|
||||
return
|
||||
|
||||
filepath, _ = QFileDialog.getSaveFileName(
|
||||
self,
|
||||
"Save Transcriptions",
|
||||
@@ -528,10 +580,21 @@ class MainWindow(QMainWindow):
|
||||
)
|
||||
|
||||
if filepath:
|
||||
if self.transcription_display.save_to_file(filepath):
|
||||
try:
|
||||
show_timestamps = self.config.get('display.show_timestamps', True)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
for result in self.transcriptions:
|
||||
line_parts = []
|
||||
if show_timestamps:
|
||||
time_str = result.timestamp.strftime("%H:%M:%S")
|
||||
line_parts.append(f"[{time_str}]")
|
||||
if result.user_name and result.user_name.strip():
|
||||
line_parts.append(f"{result.user_name}:")
|
||||
line_parts.append(result.text)
|
||||
f.write(" ".join(line_parts) + "\n")
|
||||
QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
|
||||
else:
|
||||
QMessageBox.critical(self, "Error", "Failed to save transcriptions")
|
||||
except Exception as e:
|
||||
QMessageBox.critical(self, "Error", f"Failed to save transcriptions:\n{e}")
|
||||
|
||||
def _open_settings(self):
|
||||
"""Open settings dialog."""
|
||||
@@ -569,22 +632,20 @@ class MainWindow(QMainWindow):
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
self.user_label.setText(f"User: {user_name}")
|
||||
|
||||
# Update display settings
|
||||
show_timestamps = self.config.get('display.show_timestamps', True)
|
||||
self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
|
||||
self.transcription_display.set_show_timestamps(show_timestamps)
|
||||
self.transcription_display.set_font(
|
||||
self.config.get('display.font_family', 'Courier'),
|
||||
self.config.get('display.font_size', 12)
|
||||
)
|
||||
|
||||
# Update web server settings
|
||||
if self.web_server:
|
||||
self.web_server.show_timestamps = show_timestamps
|
||||
self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
|
||||
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
||||
self.web_server.max_lines = self.config.get('display.max_lines', 50)
|
||||
self.web_server.font_family = self.config.get('display.font_family', 'Arial')
|
||||
self.web_server.font_size = self.config.get('display.font_size', 16)
|
||||
# Update font source settings
|
||||
self.web_server.font_source = self.config.get('display.font_source', 'System Font')
|
||||
self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
|
||||
self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
|
||||
|
||||
# Update sync link visibility based on server sync settings
|
||||
self._update_sync_link()
|
||||
|
||||
# Restart server sync if it was running and settings changed
|
||||
if self.is_transcribing and self.server_sync_client:
|
||||
@@ -656,18 +717,33 @@ class MainWindow(QMainWindow):
|
||||
room = self.config.get('server_sync.room', 'default')
|
||||
passphrase = self.config.get('server_sync.passphrase', '')
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
fonts_dir = self.config.fonts_dir # Custom fonts directory
|
||||
|
||||
# Font settings (shared with display settings)
|
||||
# Note: "System Font" only works locally, so we treat it as "None" for server sync
|
||||
font_source = self.config.get('display.font_source', 'System Font')
|
||||
if font_source == "System Font":
|
||||
font_source = "None" # System fonts don't work on remote displays
|
||||
websafe_font = self.config.get('display.websafe_font', '')
|
||||
google_font = self.config.get('display.google_font', '')
|
||||
custom_font_file = self.config.get('display.custom_font_file', '')
|
||||
|
||||
if not url:
|
||||
print("Server sync enabled but no URL configured")
|
||||
return
|
||||
|
||||
print(f"Starting server sync: {url}, room: {room}, user: {user_name}")
|
||||
print(f"Starting server sync: {url}, room: {room}, user: {user_name}, font: {font_source}")
|
||||
|
||||
self.server_sync_client = ServerSyncClient(
|
||||
url=url,
|
||||
room=room,
|
||||
passphrase=passphrase,
|
||||
user_name=user_name
|
||||
user_name=user_name,
|
||||
fonts_dir=fonts_dir,
|
||||
font_source=font_source,
|
||||
websafe_font=websafe_font if websafe_font else None,
|
||||
google_font=google_font if google_font else None,
|
||||
custom_font_file=custom_font_file if custom_font_file else None
|
||||
)
|
||||
self.server_sync_client.start()
|
||||
|
||||
@@ -679,6 +755,40 @@ class MainWindow(QMainWindow):
|
||||
f"Failed to start server sync:\n{e}\n\nTranscription will continue locally."
|
||||
)
|
||||
|
||||
def _update_sync_link(self):
|
||||
"""Update the multi-user sync link visibility and URL."""
|
||||
server_sync_enabled = self.config.get('server_sync.enabled', False)
|
||||
server_url = self.config.get('server_sync.url', '')
|
||||
room = self.config.get('server_sync.room', 'default')
|
||||
|
||||
if server_sync_enabled and server_url:
|
||||
# Extract base URL from the API endpoint (e.g., http://server:3000/api/send -> http://server:3000)
|
||||
try:
|
||||
from urllib.parse import urlparse, urlencode
|
||||
parsed = urlparse(server_url)
|
||||
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
# Get display settings to pass as URL parameters
|
||||
params = {
|
||||
'room': room,
|
||||
'fontfamily': self.config.get('display.font_family', 'Arial'),
|
||||
'fontsize': self.config.get('display.font_size', 16),
|
||||
'fade': self.config.get('display.fade_after_seconds', 10),
|
||||
'timestamps': 'true' if self.config.get('display.show_timestamps', True) else 'false',
|
||||
'maxlines': self.config.get('display.max_lines', 50)
|
||||
}
|
||||
display_url = f"{base_url}/display?{urlencode(params)}"
|
||||
# Show shorter text with just address and room
|
||||
display_text = f"{base_url} (room: {room})"
|
||||
self.sync_link.setText(f'🔗 Multi-User Display: <a href="{display_url}">{display_text}</a>')
|
||||
self.sync_link.setToolTip(f"Click to open: {display_url}")
|
||||
self.sync_link.setVisible(True)
|
||||
except Exception as e:
|
||||
print(f"Error parsing server URL: {e}")
|
||||
self.sync_link.setVisible(False)
|
||||
else:
|
||||
self.sync_link.setVisible(False)
|
||||
|
||||
def closeEvent(self, event):
|
||||
"""Handle window closing."""
|
||||
# Stop transcription if running
|
||||
|
||||
Reference in New Issue
Block a user