Add unified per-speaker font support and remote transcription service

Font changes: - Consolidate font settings into single Display Settings section - Support Web-Safe, Google Fonts, and Custom File uploads for both displays - Fix Google Fonts URL encoding (use + instead of %2B for spaces) - Fix per-speaker font inline style quote escaping in Node.js display - Add font debug logging to help diagnose font issues - Update web server to sync all font settings on settings change - Remove deprecated PHP server documentation files New features: - Add remote transcription service for GPU offloading - Add instance lock to prevent multiple app instances - Add version tracking Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions
--- a/client/remote_transcription.py
+++ b/client/remote_transcription.py
@@ -0,0 +1,346 @@
+"""
+Remote Transcription Client
+
+Handles streaming audio to a remote transcription service and receiving transcriptions.
+Provides fallback to local transcription if the remote service is unavailable.
+"""
+
+import asyncio
+import base64
+import json
+import logging
+import numpy as np
+from datetime import datetime
+from threading import Thread, Lock
+from typing import Optional, Callable
+from queue import Queue, Empty
+
+logger = logging.getLogger(__name__)
+
+
+class RemoteTranscriptionClient:
+    """
+    Client for remote transcription service.
+
+    Streams audio to a remote server and receives transcriptions.
+    """
+
+    def __init__(
+        self,
+        server_url: str,
+        api_key: str,
+        on_transcription: Optional[Callable[[str, bool], None]] = None,
+        on_error: Optional[Callable[[str], None]] = None,
+        on_connection_change: Optional[Callable[[bool], None]] = None,
+        sample_rate: int = 16000
+    ):
+        """
+        Initialize remote transcription client.
+
+        Args:
+            server_url: WebSocket URL of the transcription service
+            api_key: API key for authentication
+            on_transcription: Callback for transcriptions (text, is_preview)
+            on_error: Callback for errors
+            on_connection_change: Callback for connection status changes
+            sample_rate: Audio sample rate
+        """
+        self.server_url = server_url
+        self.api_key = api_key
+        self.sample_rate = sample_rate
+        self.on_transcription = on_transcription
+        self.on_error = on_error
+        self.on_connection_change = on_connection_change
+
+        self.websocket = None
+        self.is_connected = False
+        self.is_authenticated = False
+        self.is_running = False
+
+        self.audio_queue: Queue = Queue()
+        self.send_thread: Optional[Thread] = None
+        self.receive_thread: Optional[Thread] = None
+        self.loop: Optional[asyncio.AbstractEventLoop] = None
+
+        self._lock = Lock()
+
+    async def _connect(self):
+        """Establish WebSocket connection and authenticate."""
+        try:
+            import websockets
+
+            logger.info(f"Connecting to {self.server_url}")
+            self.websocket = await websockets.connect(
+                self.server_url,
+                ping_interval=30,
+                ping_timeout=10
+            )
+
+            # Authenticate
+            auth_message = {
+                "type": "auth",
+                "api_key": self.api_key
+            }
+            await self.websocket.send(json.dumps(auth_message))
+
+            # Wait for auth response
+            response = await asyncio.wait_for(
+                self.websocket.recv(),
+                timeout=10.0
+            )
+            auth_result = json.loads(response)
+
+            if auth_result.get("type") == "auth_result" and auth_result.get("success"):
+                self.is_connected = True
+                self.is_authenticated = True
+                logger.info("Connected and authenticated to remote transcription service")
+                if self.on_connection_change:
+                    self.on_connection_change(True)
+                return True
+            else:
+                error_msg = auth_result.get("message", "Authentication failed")
+                logger.error(f"Authentication failed: {error_msg}")
+                if self.on_error:
+                    self.on_error(f"Authentication failed: {error_msg}")
+                return False
+
+        except Exception as e:
+            logger.error(f"Connection failed: {e}")
+            if self.on_error:
+                self.on_error(f"Connection failed: {e}")
+            return False
+
+    async def _send_loop(self):
+        """Send audio chunks from the queue."""
+        while self.is_running and self.websocket:
+            try:
+                # Get audio from queue with timeout
+                try:
+                    audio_data = self.audio_queue.get(timeout=0.1)
+                except Empty:
+                    continue
+
+                if audio_data is None:
+                    continue
+
+                # Encode audio as base64
+                audio_bytes = audio_data.astype(np.float32).tobytes()
+                audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
+
+                # Send to server
+                message = {
+                    "type": "audio",
+                    "data": audio_b64,
+                    "sample_rate": self.sample_rate
+                }
+                await self.websocket.send(json.dumps(message))
+
+            except Exception as e:
+                if self.is_running:
+                    logger.error(f"Send error: {e}")
+                break
+
+    async def _receive_loop(self):
+        """Receive transcriptions from the server."""
+        while self.is_running and self.websocket:
+            try:
+                message = await asyncio.wait_for(
+                    self.websocket.recv(),
+                    timeout=1.0
+                )
+                data = json.loads(message)
+                msg_type = data.get("type", "")
+
+                if msg_type == "transcription":
+                    text = data.get("text", "")
+                    is_preview = data.get("is_preview", False)
+                    if text and self.on_transcription:
+                        self.on_transcription(text, is_preview)
+
+                elif msg_type == "error":
+                    error_msg = data.get("message", "Unknown error")
+                    logger.error(f"Server error: {error_msg}")
+                    if self.on_error:
+                        self.on_error(error_msg)
+
+                elif msg_type == "pong":
+                    pass  # Keep-alive response
+
+            except asyncio.TimeoutError:
+                continue
+            except Exception as e:
+                if self.is_running:
+                    logger.error(f"Receive error: {e}")
+                break
+
+        # Connection lost
+        self.is_connected = False
+        self.is_authenticated = False
+        if self.on_connection_change:
+            self.on_connection_change(False)
+
+    def _run_async(self):
+        """Run the async event loop in a thread."""
+        self.loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self.loop)
+
+        try:
+            # Connect
+            connected = self.loop.run_until_complete(self._connect())
+            if not connected:
+                return
+
+            # Run send and receive loops
+            tasks = [
+                self._send_loop(),
+                self._receive_loop()
+            ]
+            self.loop.run_until_complete(asyncio.gather(*tasks))
+
+        except Exception as e:
+            logger.error(f"Async loop error: {e}")
+        finally:
+            if self.websocket:
+                try:
+                    self.loop.run_until_complete(self.websocket.close())
+                except:
+                    pass
+            self.loop.close()
+
+    def start(self):
+        """Start the remote transcription client."""
+        with self._lock:
+            if self.is_running:
+                return
+
+            self.is_running = True
+
+            # Start async loop in background thread
+            self.send_thread = Thread(target=self._run_async, daemon=True)
+            self.send_thread.start()
+
+    def stop(self):
+        """Stop the remote transcription client."""
+        with self._lock:
+            self.is_running = False
+
+            # Signal end to server
+            if self.websocket and self.loop:
+                try:
+                    asyncio.run_coroutine_threadsafe(
+                        self.websocket.send(json.dumps({"type": "end"})),
+                        self.loop
+                    )
+                except:
+                    pass
+
+            self.is_connected = False
+            self.is_authenticated = False
+
+    def send_audio(self, audio_data: np.ndarray):
+        """
+        Send audio data for transcription.
+
+        Args:
+            audio_data: Audio data as numpy array (float32, mono, sample_rate)
+        """
+        if self.is_connected and self.is_authenticated:
+            self.audio_queue.put(audio_data)
+
+    @property
+    def connected(self) -> bool:
+        """Check if connected and authenticated."""
+        return self.is_connected and self.is_authenticated
+
+
+class RemoteTranscriptionManager:
+    """
+    Manages remote transcription with fallback to local processing.
+    """
+
+    def __init__(
+        self,
+        server_url: str,
+        api_key: str,
+        local_engine=None,
+        on_transcription: Optional[Callable] = None,
+        on_preview: Optional[Callable] = None
+    ):
+        """
+        Initialize the remote transcription manager.
+
+        Args:
+            server_url: Remote transcription service URL
+            api_key: API key for authentication
+            local_engine: Local transcription engine for fallback
+            on_transcription: Callback for final transcriptions
+            on_preview: Callback for preview transcriptions
+        """
+        self.server_url = server_url
+        self.api_key = api_key
+        self.local_engine = local_engine
+        self.on_transcription = on_transcription
+        self.on_preview = on_preview
+
+        self.client: Optional[RemoteTranscriptionClient] = None
+        self.use_remote = True
+        self.is_running = False
+
+    def _handle_transcription(self, text: str, is_preview: bool):
+        """Handle transcription from remote service."""
+        if is_preview:
+            if self.on_preview:
+                self.on_preview(text)
+        else:
+            if self.on_transcription:
+                self.on_transcription(text)
+
+    def _handle_error(self, error: str):
+        """Handle error from remote service."""
+        logger.error(f"Remote transcription error: {error}")
+        # Could switch to local fallback here
+
+    def _handle_connection_change(self, connected: bool):
+        """Handle connection status change."""
+        if connected:
+            logger.info("Remote transcription connected")
+        else:
+            logger.warning("Remote transcription disconnected")
+            # Could switch to local fallback here
+
+    def start(self):
+        """Start remote transcription."""
+        if self.is_running:
+            return
+
+        self.is_running = True
+
+        if self.use_remote and self.server_url and self.api_key:
+            self.client = RemoteTranscriptionClient(
+                server_url=self.server_url,
+                api_key=self.api_key,
+                on_transcription=self._handle_transcription,
+                on_error=self._handle_error,
+                on_connection_change=self._handle_connection_change
+            )
+            self.client.start()
+
+    def stop(self):
+        """Stop remote transcription."""
+        self.is_running = False
+        if self.client:
+            self.client.stop()
+            self.client = None
+
+    def send_audio(self, audio_data: np.ndarray):
+        """Send audio for transcription."""
+        if self.client and self.client.connected:
+            self.client.send_audio(audio_data)
+        elif self.local_engine:
+            # Fallback to local processing
+            pass  # Local engine handles its own audio capture
+
+    @property
+    def is_connected(self) -> bool:
+        """Check if remote service is connected."""
+        return self.client is not None and self.client.connected