Add unified per-speaker font support and remote transcription service

Font changes: - Consolidate font settings into single Display Settings section - Support Web-Safe, Google Fonts, and Custom File uploads for both displays - Fix Google Fonts URL encoding (use + instead of %2B for spaces) - Fix per-speaker font inline style quote escaping in Node.js display - Add font debug logging to help diagnose font issues - Update web server to sync all font settings on settings change - Remove deprecated PHP server documentation files New features: - Add remote transcription service for GPU offloading - Add instance lock to prevent multiple app instances - Add version tracking Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions
--- a/server/web_display.py
+++ b/server/web_display.py
@@ -1,8 +1,9 @@
 """Web server for displaying transcriptions in a browser (for OBS browser source)."""

 import asyncio
+from pathlib import Path
 from fastapi import FastAPI, WebSocket
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse, FileResponse
 from typing import List, Optional
 import json
 from datetime import datetime
@@ -11,7 +12,11 @@ from datetime import datetime
 class TranscriptionWebServer:
    """Web server for displaying transcriptions."""

-    def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial", font_size: int = 16):
+    def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True,
+                 fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial",
+                 font_size: int = 16, fonts_dir: Optional[Path] = None,
+                 font_source: str = "System Font", websafe_font: str = "Arial",
+                 google_font: str = "Roboto"):
        """
        Initialize web server.

@@ -21,8 +26,12 @@ class TranscriptionWebServer:
            show_timestamps: Whether to show timestamps in transcriptions
            fade_after_seconds: Time in seconds before transcriptions fade out (0 = never fade)
            max_lines: Maximum number of lines to display at once
-            font_family: Font family for display
+            font_family: Font family for display (system font)
            font_size: Font size in pixels
+            fonts_dir: Directory containing custom font files
+            font_source: Font source type ("System Font", "Web-Safe", "Google Font")
+            websafe_font: Web-safe font name
+            google_font: Google Font name
        """
        self.host = host
        self.port = port
@@ -31,6 +40,10 @@ class TranscriptionWebServer:
        self.max_lines = max_lines
        self.font_family = font_family
        self.font_size = font_size
+        self.fonts_dir = fonts_dir
+        self.font_source = font_source
+        self.websafe_font = websafe_font
+        self.google_font = google_font
        self.app = FastAPI()
        self.active_connections: List[WebSocket] = []
        self.transcriptions = []  # Store recent transcriptions
@@ -46,6 +59,23 @@ class TranscriptionWebServer:
            """Serve the transcription display page."""
            return self._get_html()

+        @self.app.get("/fonts/{font_file}")
+        async def serve_font(font_file: str):
+            """Serve custom font files."""
+            if self.fonts_dir:
+                font_path = self.fonts_dir / font_file
+                if font_path.exists() and font_path.suffix.lower() in {'.ttf', '.otf', '.woff', '.woff2'}:
+                    # Determine MIME type
+                    mime_types = {
+                        '.ttf': 'font/ttf',
+                        '.otf': 'font/otf',
+                        '.woff': 'font/woff',
+                        '.woff2': 'font/woff2'
+                    }
+                    media_type = mime_types.get(font_path.suffix.lower(), 'application/octet-stream')
+                    return FileResponse(font_path, media_type=media_type)
+            return HTMLResponse(status_code=404, content="Font not found")
+
        @self.app.websocket("/ws")
        async def websocket_endpoint(websocket: WebSocket):
            """WebSocket endpoint for real-time updates."""
@@ -64,19 +94,70 @@ class TranscriptionWebServer:
            except:
                self.active_connections.remove(websocket)

+    def _get_font_face_css(self) -> str:
+        """Generate @font-face CSS rules for custom fonts."""
+        if not self.fonts_dir or not self.fonts_dir.exists():
+            return ""
+
+        css_rules = []
+        font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
+        format_map = {
+            '.ttf': 'truetype',
+            '.otf': 'opentype',
+            '.woff': 'woff',
+            '.woff2': 'woff2'
+        }
+
+        for font_file in self.fonts_dir.iterdir():
+            if font_file.suffix.lower() in font_extensions:
+                font_name = font_file.stem
+                font_format = format_map.get(font_file.suffix.lower(), 'truetype')
+                css_rules.append(f"""
+        @font-face {{
+            font-family: '{font_name}';
+            src: url('/fonts/{font_file.name}') format('{font_format}');
+            font-weight: normal;
+            font-style: normal;
+        }}""")
+
+        return "\n".join(css_rules)
+
+    def _get_effective_font(self) -> str:
+        """Get the effective font family based on font_source setting."""
+        if self.font_source == "Google Font" and self.google_font:
+            return self.google_font
+        elif self.font_source == "Web-Safe" and self.websafe_font:
+            return self.websafe_font
+        else:
+            return self.font_family
+
+    def _get_google_font_link(self) -> str:
+        """Generate Google Fonts link tag if using Google Font."""
+        if self.font_source == "Google Font" and self.google_font:
+            font_name = self.google_font.replace(' ', '+')
+            return f'<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family={font_name}&display=swap">'
+        return ""
+
    def _get_html(self) -> str:
        """Generate HTML for transcription display."""
+        # Generate custom font CSS
+        font_face_css = self._get_font_face_css()
+        google_font_link = self._get_google_font_link()
+        effective_font = self._get_effective_font()
+
        return f"""
 <!DOCTYPE html>
 <html>
 <head>
    <title>Transcription Display</title>
+    {google_font_link}
    <style>
+        {font_face_css}
        body {{
            margin: 0;
            padding: 20px;
            background: transparent;
-            font-family: {self.font_family}, sans-serif;
+            font-family: '{effective_font}', sans-serif;
            font-size: {self.font_size}px;
            color: white;
            overflow: hidden;
@@ -108,6 +189,14 @@ class TranscriptionWebServer:
        .text {{
            color: white;
        }}
+        .transcription.preview {{
+            font-style: italic;
+        }}
+        .preview-indicator {{
+            color: #888;
+            font-size: 0.85em;
+            margin-right: 5px;
+        }}
        @keyframes slideIn {{
            from {{
                opacity: 0;
@@ -129,9 +218,15 @@ class TranscriptionWebServer:
        const fadeAfterSeconds = {self.fade_after_seconds};
        const maxLines = {self.max_lines};

+        let currentPreviewElement = null;
+
        ws.onmessage = (event) => {{
            const data = JSON.parse(event.data);
-            addTranscription(data);
+            if (data.is_preview) {{
+                handlePreview(data);
+            }} else {{
+                addTranscription(data);
+            }}
        }};

        ws.onclose = () => {{
@@ -146,35 +241,86 @@ class TranscriptionWebServer:
            }}
        }}, 30000);

-        function addTranscription(data) {{
+        function handlePreview(data) {{
+            // If there's already a preview, update it
+            if (currentPreviewElement) {{
+                updatePreviewContent(currentPreviewElement, data);
+            }} else {{
+                // Create new preview element
+                currentPreviewElement = createTranscriptionElement(data, true);
+                container.appendChild(currentPreviewElement);
+            }}
+
+            // Enforce max lines limit
+            while (container.children.length > maxLines) {{
+                const first = container.firstChild;
+                if (first === currentPreviewElement) break; // Don't remove current preview
+                container.removeChild(first);
+            }}
+        }}
+
+        function updatePreviewContent(element, data) {{
+            let html = '';
+            if (data.timestamp) {{
+                html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
+            }}
+            if (data.user_name && data.user_name.trim()) {{
+                html += `<span class="user">${{data.user_name}}:</span>`;
+            }}
+            html += `<span class="preview-indicator">[...]</span>`;
+            html += `<span class="text">${{data.text}}</span>`;
+            element.innerHTML = html;
+        }}
+
+        function createTranscriptionElement(data, isPreview) {{
            const div = document.createElement('div');
-            div.className = 'transcription';
+            div.className = isPreview ? 'transcription preview' : 'transcription';

            let html = '';
            if (data.timestamp) {{
                html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
            }}
-            if (data.user_name) {{
+            if (data.user_name && data.user_name.trim()) {{
                html += `<span class="user">${{data.user_name}}:</span>`;
            }}
+            if (isPreview) {{
+                html += `<span class="preview-indicator">[...]</span>`;
+            }}
            html += `<span class="text">${{data.text}}</span>`;

            div.innerHTML = html;
-            container.appendChild(div);
+            return div;
+        }}

-            // Set up fade-out if enabled
-            if (fadeAfterSeconds > 0) {{
-                setTimeout(() => {{
-                    // Start fade animation
-                    div.classList.add('fading');
+        function addTranscription(data) {{
+            // If there's a preview, replace it with final transcription
+            if (currentPreviewElement) {{
+                currentPreviewElement.className = 'transcription';
+                let html = '';
+                if (data.timestamp) {{
+                    html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
+                }}
+                if (data.user_name && data.user_name.trim()) {{
+                    html += `<span class="user">${{data.user_name}}:</span>`;
+                }}
+                html += `<span class="text">${{data.text}}</span>`;
+                currentPreviewElement.innerHTML = html;

-                    // Remove element after fade completes
-                    setTimeout(() => {{
-                        if (div.parentNode === container) {{
-                            container.removeChild(div);
-                        }}
-                    }}, 1000); // Match the CSS transition duration
-                }}, fadeAfterSeconds * 1000);
+                // Set up fade-out for the final transcription
+                if (fadeAfterSeconds > 0) {{
+                    setupFadeOut(currentPreviewElement);
+                }}
+
+                currentPreviewElement = null;
+            }} else {{
+                // No preview to replace, add new element
+                const div = createTranscriptionElement(data, false);
+                container.appendChild(div);
+
+                // Set up fade-out if enabled
+                if (fadeAfterSeconds > 0) {{
+                    setupFadeOut(div);
+                }}
            }}

            // Enforce max lines limit
@@ -182,6 +328,20 @@ class TranscriptionWebServer:
                container.removeChild(container.firstChild);
            }}
        }}
+
+        function setupFadeOut(element) {{
+            setTimeout(() => {{
+                // Start fade animation
+                element.classList.add('fading');
+
+                // Remove element after fade completes
+                setTimeout(() => {{
+                    if (element.parentNode === container) {{
+                        container.removeChild(element);
+                    }}
+                }}, 1000); // Match the CSS transition duration
+            }}, fadeAfterSeconds * 1000);
+        }}
    </script>
 </body>
 </html>
@@ -225,6 +385,43 @@ class TranscriptionWebServer:
        for conn in disconnected:
            self.active_connections.remove(conn)

+    async def broadcast_preview(self, text: str, user_name: str = "", timestamp: Optional[datetime] = None):
+        """
+        Broadcast a preview transcription to all connected clients.
+        Preview transcriptions are shown in italics and will be replaced by final.
+
+        Args:
+            text: Preview transcription text
+            user_name: User/speaker name
+            timestamp: Timestamp of transcription
+        """
+        if timestamp is None:
+            timestamp = datetime.now()
+
+        trans_data = {
+            "text": text,
+            "user_name": user_name,
+            "is_preview": True,  # Flag to indicate this is a preview
+        }
+
+        # Only include timestamp if enabled
+        if self.show_timestamps:
+            trans_data["timestamp"] = timestamp.strftime("%H:%M:%S")
+
+        # Don't store previews in transcriptions list (they're temporary)
+
+        # Broadcast to all connected clients
+        disconnected = []
+        for connection in self.active_connections:
+            try:
+                await connection.send_json(trans_data)
+            except:
+                disconnected.append(connection)
+
+        # Remove disconnected clients
+        for conn in disconnected:
+            self.active_connections.remove(conn)
+
    async def start(self):
        """Start the web server."""
        import uvicorn