Add unified per-speaker font support and remote transcription service

Font changes:
- Consolidate font settings into single Display Settings section
- Support Web-Safe, Google Fonts, and Custom File uploads for both displays
- Fix Google Fonts URL encoding (use + instead of %2B for spaces)
- Fix per-speaker font inline style quote escaping in Node.js display
- Add font debug logging to help diagnose font issues
- Update web server to sync all font settings on settings change
- Remove deprecated PHP server documentation files

New features:
- Add remote transcription service for GPU offloading
- Add instance lock to prevent multiple app instances
- Add version tracking

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions

View File

@@ -1,8 +1,9 @@
"""Web server for displaying transcriptions in a browser (for OBS browser source)."""
import asyncio
from pathlib import Path
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from fastapi.responses import HTMLResponse, FileResponse
from typing import List, Optional
import json
from datetime import datetime
@@ -11,7 +12,11 @@ from datetime import datetime
class TranscriptionWebServer:
"""Web server for displaying transcriptions."""
def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial", font_size: int = 16):
def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True,
fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial",
font_size: int = 16, fonts_dir: Optional[Path] = None,
font_source: str = "System Font", websafe_font: str = "Arial",
google_font: str = "Roboto"):
"""
Initialize web server.
@@ -21,8 +26,12 @@ class TranscriptionWebServer:
show_timestamps: Whether to show timestamps in transcriptions
fade_after_seconds: Time in seconds before transcriptions fade out (0 = never fade)
max_lines: Maximum number of lines to display at once
font_family: Font family for display
font_family: Font family for display (system font)
font_size: Font size in pixels
fonts_dir: Directory containing custom font files
font_source: Font source type ("System Font", "Web-Safe", "Google Font")
websafe_font: Web-safe font name
google_font: Google Font name
"""
self.host = host
self.port = port
@@ -31,6 +40,10 @@ class TranscriptionWebServer:
self.max_lines = max_lines
self.font_family = font_family
self.font_size = font_size
self.fonts_dir = fonts_dir
self.font_source = font_source
self.websafe_font = websafe_font
self.google_font = google_font
self.app = FastAPI()
self.active_connections: List[WebSocket] = []
self.transcriptions = [] # Store recent transcriptions
@@ -46,6 +59,23 @@ class TranscriptionWebServer:
"""Serve the transcription display page."""
return self._get_html()
@self.app.get("/fonts/{font_file}")
async def serve_font(font_file: str):
"""Serve custom font files."""
if self.fonts_dir:
font_path = self.fonts_dir / font_file
if font_path.exists() and font_path.suffix.lower() in {'.ttf', '.otf', '.woff', '.woff2'}:
# Determine MIME type
mime_types = {
'.ttf': 'font/ttf',
'.otf': 'font/otf',
'.woff': 'font/woff',
'.woff2': 'font/woff2'
}
media_type = mime_types.get(font_path.suffix.lower(), 'application/octet-stream')
return FileResponse(font_path, media_type=media_type)
return HTMLResponse(status_code=404, content="Font not found")
@self.app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
"""WebSocket endpoint for real-time updates."""
@@ -64,19 +94,70 @@ class TranscriptionWebServer:
except:
self.active_connections.remove(websocket)
def _get_font_face_css(self) -> str:
"""Generate @font-face CSS rules for custom fonts."""
if not self.fonts_dir or not self.fonts_dir.exists():
return ""
css_rules = []
font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
format_map = {
'.ttf': 'truetype',
'.otf': 'opentype',
'.woff': 'woff',
'.woff2': 'woff2'
}
for font_file in self.fonts_dir.iterdir():
if font_file.suffix.lower() in font_extensions:
font_name = font_file.stem
font_format = format_map.get(font_file.suffix.lower(), 'truetype')
css_rules.append(f"""
@font-face {{
font-family: '{font_name}';
src: url('/fonts/{font_file.name}') format('{font_format}');
font-weight: normal;
font-style: normal;
}}""")
return "\n".join(css_rules)
def _get_effective_font(self) -> str:
"""Get the effective font family based on font_source setting."""
if self.font_source == "Google Font" and self.google_font:
return self.google_font
elif self.font_source == "Web-Safe" and self.websafe_font:
return self.websafe_font
else:
return self.font_family
def _get_google_font_link(self) -> str:
"""Generate Google Fonts link tag if using Google Font."""
if self.font_source == "Google Font" and self.google_font:
font_name = self.google_font.replace(' ', '+')
return f'<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family={font_name}&display=swap">'
return ""
def _get_html(self) -> str:
"""Generate HTML for transcription display."""
# Generate custom font CSS
font_face_css = self._get_font_face_css()
google_font_link = self._get_google_font_link()
effective_font = self._get_effective_font()
return f"""
<!DOCTYPE html>
<html>
<head>
<title>Transcription Display</title>
{google_font_link}
<style>
{font_face_css}
body {{
margin: 0;
padding: 20px;
background: transparent;
font-family: {self.font_family}, sans-serif;
font-family: '{effective_font}', sans-serif;
font-size: {self.font_size}px;
color: white;
overflow: hidden;
@@ -108,6 +189,14 @@ class TranscriptionWebServer:
.text {{
color: white;
}}
.transcription.preview {{
font-style: italic;
}}
.preview-indicator {{
color: #888;
font-size: 0.85em;
margin-right: 5px;
}}
@keyframes slideIn {{
from {{
opacity: 0;
@@ -129,9 +218,15 @@ class TranscriptionWebServer:
const fadeAfterSeconds = {self.fade_after_seconds};
const maxLines = {self.max_lines};
let currentPreviewElement = null;
ws.onmessage = (event) => {{
const data = JSON.parse(event.data);
addTranscription(data);
if (data.is_preview) {{
handlePreview(data);
}} else {{
addTranscription(data);
}}
}};
ws.onclose = () => {{
@@ -146,35 +241,86 @@ class TranscriptionWebServer:
}}
}}, 30000);
function addTranscription(data) {{
function handlePreview(data) {{
// If there's already a preview, update it
if (currentPreviewElement) {{
updatePreviewContent(currentPreviewElement, data);
}} else {{
// Create new preview element
currentPreviewElement = createTranscriptionElement(data, true);
container.appendChild(currentPreviewElement);
}}
// Enforce max lines limit
while (container.children.length > maxLines) {{
const first = container.firstChild;
if (first === currentPreviewElement) break; // Don't remove current preview
container.removeChild(first);
}}
}}
function updatePreviewContent(element, data) {{
let html = '';
if (data.timestamp) {{
html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
}}
if (data.user_name && data.user_name.trim()) {{
html += `<span class="user">${{data.user_name}}:</span>`;
}}
html += `<span class="preview-indicator">[...]</span>`;
html += `<span class="text">${{data.text}}</span>`;
element.innerHTML = html;
}}
function createTranscriptionElement(data, isPreview) {{
const div = document.createElement('div');
div.className = 'transcription';
div.className = isPreview ? 'transcription preview' : 'transcription';
let html = '';
if (data.timestamp) {{
html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
}}
if (data.user_name) {{
if (data.user_name && data.user_name.trim()) {{
html += `<span class="user">${{data.user_name}}:</span>`;
}}
if (isPreview) {{
html += `<span class="preview-indicator">[...]</span>`;
}}
html += `<span class="text">${{data.text}}</span>`;
div.innerHTML = html;
container.appendChild(div);
return div;
}}
// Set up fade-out if enabled
if (fadeAfterSeconds > 0) {{
setTimeout(() => {{
// Start fade animation
div.classList.add('fading');
function addTranscription(data) {{
// If there's a preview, replace it with final transcription
if (currentPreviewElement) {{
currentPreviewElement.className = 'transcription';
let html = '';
if (data.timestamp) {{
html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
}}
if (data.user_name && data.user_name.trim()) {{
html += `<span class="user">${{data.user_name}}:</span>`;
}}
html += `<span class="text">${{data.text}}</span>`;
currentPreviewElement.innerHTML = html;
// Remove element after fade completes
setTimeout(() => {{
if (div.parentNode === container) {{
container.removeChild(div);
}}
}}, 1000); // Match the CSS transition duration
}}, fadeAfterSeconds * 1000);
// Set up fade-out for the final transcription
if (fadeAfterSeconds > 0) {{
setupFadeOut(currentPreviewElement);
}}
currentPreviewElement = null;
}} else {{
// No preview to replace, add new element
const div = createTranscriptionElement(data, false);
container.appendChild(div);
// Set up fade-out if enabled
if (fadeAfterSeconds > 0) {{
setupFadeOut(div);
}}
}}
// Enforce max lines limit
@@ -182,6 +328,20 @@ class TranscriptionWebServer:
container.removeChild(container.firstChild);
}}
}}
function setupFadeOut(element) {{
setTimeout(() => {{
// Start fade animation
element.classList.add('fading');
// Remove element after fade completes
setTimeout(() => {{
if (element.parentNode === container) {{
container.removeChild(element);
}}
}}, 1000); // Match the CSS transition duration
}}, fadeAfterSeconds * 1000);
}}
</script>
</body>
</html>
@@ -225,6 +385,43 @@ class TranscriptionWebServer:
for conn in disconnected:
self.active_connections.remove(conn)
async def broadcast_preview(self, text: str, user_name: str = "", timestamp: Optional[datetime] = None):
"""
Broadcast a preview transcription to all connected clients.
Preview transcriptions are shown in italics and will be replaced by final.
Args:
text: Preview transcription text
user_name: User/speaker name
timestamp: Timestamp of transcription
"""
if timestamp is None:
timestamp = datetime.now()
trans_data = {
"text": text,
"user_name": user_name,
"is_preview": True, # Flag to indicate this is a preview
}
# Only include timestamp if enabled
if self.show_timestamps:
trans_data["timestamp"] = timestamp.strftime("%H:%M:%S")
# Don't store previews in transcriptions list (they're temporary)
# Broadcast to all connected clients
disconnected = []
for connection in self.active_connections:
try:
await connection.send_json(trans_data)
except:
disconnected.append(connection)
# Remove disconnected clients
for conn in disconnected:
self.active_connections.remove(conn)
async def start(self):
"""Start the web server."""
import uvicorn