Add unified per-speaker font support and remote transcription service

Font changes: - Consolidate font settings into single Display Settings section - Support Web-Safe, Google Fonts, and Custom File uploads for both displays - Fix Google Fonts URL encoding (use + instead of %2B for spaces) - Fix per-speaker font inline style quote escaping in Node.js display - Add font debug logging to help diagnose font issues - Update web server to sync all font settings on settings change - Remove deprecated PHP server documentation files New features: - Add remote transcription service for GPU offloading - Add instance lock to prevent multiple app instances - Add version tracking Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 18:56:12 -08:00
parent f035bdb927
commit ff067b3368
23 changed files with 2486 additions and 1160 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -174,8 +174,9 @@ See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructio
 - [client/server_sync.py](client/server_sync.py) handles server communication
 - Toggle in Settings: "Enable Server Sync"
- Sends transcriptions to PHP server via POST
+- Sends transcriptions to Node.js server via HTTP POST
- Separate web display shows merged transcriptions from all users
+- Real-time updates via WebSocket to display page
 - Per-speaker font support (Web-Safe, Google Fonts, Custom uploads)
 - Falls back gracefully if server unavailable
 ## Common Patterns
@@ -191,8 +192,8 @@ See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructio
 ### Modifying Transcription Display
 - Local GUI: [gui/transcription_display_qt.py](gui/transcription_display_qt.py)
- Web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
+- Local web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
- Multi-user display: [server/php/display.php](server/php/display.php)
+- Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
 ### Adding a New Model Size
--- a/client/config.py
+++ b/client/config.py
@@ -19,6 +19,10 @@ class Config:
        self.app_dir = Path.home() / ".local-transcription"
        self.app_dir.mkdir(parents=True, exist_ok=True)
        # Fonts directory for custom font files
        self.fonts_dir = self.app_dir / "fonts"
        self.fonts_dir.mkdir(parents=True, exist_ok=True)
        if config_path is None:
            self.config_path = self.app_dir / "config.yaml"
        else:
@@ -34,7 +38,7 @@ class Config:
                self.config = yaml.safe_load(f) or {}
        else:
            # Load default configuration
-            default_config_path = Path(__file__).parent.parent / "config" / "default_config.yaml"
+            default_config_path = Path(__file__).resolve().parent.parent / "config" / "default_config.yaml"
            if default_config_path.exists():
                with open(default_config_path, 'r') as f:
                    self.config = yaml.safe_load(f) or {}
@@ -137,5 +141,24 @@ class Config:
        self.config = self._get_default_config()
        self.save()
    def get_custom_fonts(self) -> list:
        """
        Get list of custom font files in the fonts directory.
        Returns:
            List of (font_name, font_path) tuples
        """
        fonts = []
        font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
        if self.fonts_dir.exists():
            for font_file in self.fonts_dir.iterdir():
                if font_file.suffix.lower() in font_extensions:
                    # Use filename without extension as font name
                    font_name = font_file.stem
                    fonts.append((font_name, font_file))
        return sorted(fonts, key=lambda x: x[0].lower())
    def __repr__(self) -> str:
        return f"Config(path={self.config_path})"
--- a/client/instance_lock.py
+++ b/client/instance_lock.py
@@ -0,0 +1,94 @@
 """Single instance lock management for Local Transcription application."""
 import os
 import sys
 from pathlib import Path
 class InstanceLock:
    """Manages single instance lock using a PID file."""
    def __init__(self):
        """Initialize the instance lock."""
        self.lock_dir = Path.home() / '.local-transcription'
        self.lock_file = self.lock_dir / 'app.lock'
    def acquire(self) -> bool:
        """
        Try to acquire the instance lock.
        Returns:
            True if lock acquired (no other instance running),
            False if another instance is already running.
        """
        # Ensure lock directory exists
        self.lock_dir.mkdir(parents=True, exist_ok=True)
        if self.lock_file.exists():
            try:
                pid_str = self.lock_file.read_text().strip()
                if pid_str:
                    pid = int(pid_str)
                    if self._is_process_running(pid):
                        return False
            except (ValueError, OSError):
                # Invalid PID file, we can overwrite it
                pass
        # Write our PID to the lock file
        try:
            self.lock_file.write_text(str(os.getpid()))
            return True
        except OSError:
            return False
    def release(self):
        """Release the instance lock."""
        try:
            if self.lock_file.exists():
                # Only remove if it contains our PID
                pid_str = self.lock_file.read_text().strip()
                if pid_str and int(pid_str) == os.getpid():
                    self.lock_file.unlink()
        except (ValueError, OSError):
            pass
    def _is_process_running(self, pid: int) -> bool:
        """
        Check if a process with the given PID is running.
        Args:
            pid: Process ID to check
        Returns:
            True if process is running, False otherwise
        """
        if sys.platform == 'win32':
            # Windows
            try:
                import ctypes
                kernel32 = ctypes.windll.kernel32
                SYNCHRONIZE = 0x00100000
                process = kernel32.OpenProcess(SYNCHRONIZE, False, pid)
                if process:
                    kernel32.CloseHandle(process)
                    return True
                return False
            except Exception:
                return False
        else:
            # Unix/Linux/macOS
            try:
                os.kill(pid, 0)
                return True
            except OSError:
                return False
    def __enter__(self):
        """Context manager entry."""
        return self.acquire()
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.release()
        return False
--- a/client/remote_transcription.py
+++ b/client/remote_transcription.py
@@ -0,0 +1,346 @@
 """
 Remote Transcription Client
 Handles streaming audio to a remote transcription service and receiving transcriptions.
 Provides fallback to local transcription if the remote service is unavailable.
 """
 import asyncio
 import base64
 import json
 import logging
 import numpy as np
 from datetime import datetime
 from threading import Thread, Lock
 from typing import Optional, Callable
 from queue import Queue, Empty
 logger = logging.getLogger(__name__)
 class RemoteTranscriptionClient:
    """
    Client for remote transcription service.
    Streams audio to a remote server and receives transcriptions.
    """
    def __init__(
        self,
        server_url: str,
        api_key: str,
        on_transcription: Optional[Callable[[str, bool], None]] = None,
        on_error: Optional[Callable[[str], None]] = None,
        on_connection_change: Optional[Callable[[bool], None]] = None,
        sample_rate: int = 16000
    ):
        """
        Initialize remote transcription client.
        Args:
            server_url: WebSocket URL of the transcription service
            api_key: API key for authentication
            on_transcription: Callback for transcriptions (text, is_preview)
            on_error: Callback for errors
            on_connection_change: Callback for connection status changes
            sample_rate: Audio sample rate
        """
        self.server_url = server_url
        self.api_key = api_key
        self.sample_rate = sample_rate
        self.on_transcription = on_transcription
        self.on_error = on_error
        self.on_connection_change = on_connection_change
        self.websocket = None
        self.is_connected = False
        self.is_authenticated = False
        self.is_running = False
        self.audio_queue: Queue = Queue()
        self.send_thread: Optional[Thread] = None
        self.receive_thread: Optional[Thread] = None
        self.loop: Optional[asyncio.AbstractEventLoop] = None
        self._lock = Lock()
    async def _connect(self):
        """Establish WebSocket connection and authenticate."""
        try:
            import websockets
            logger.info(f"Connecting to {self.server_url}")
            self.websocket = await websockets.connect(
                self.server_url,
                ping_interval=30,
                ping_timeout=10
            )
            # Authenticate
            auth_message = {
                "type": "auth",
                "api_key": self.api_key
            }
            await self.websocket.send(json.dumps(auth_message))
            # Wait for auth response
            response = await asyncio.wait_for(
                self.websocket.recv(),
                timeout=10.0
            )
            auth_result = json.loads(response)
            if auth_result.get("type") == "auth_result" and auth_result.get("success"):
                self.is_connected = True
                self.is_authenticated = True
                logger.info("Connected and authenticated to remote transcription service")
                if self.on_connection_change:
                    self.on_connection_change(True)
                return True
            else:
                error_msg = auth_result.get("message", "Authentication failed")
                logger.error(f"Authentication failed: {error_msg}")
                if self.on_error:
                    self.on_error(f"Authentication failed: {error_msg}")
                return False
        except Exception as e:
            logger.error(f"Connection failed: {e}")
            if self.on_error:
                self.on_error(f"Connection failed: {e}")
            return False
    async def _send_loop(self):
        """Send audio chunks from the queue."""
        while self.is_running and self.websocket:
            try:
                # Get audio from queue with timeout
                try:
                    audio_data = self.audio_queue.get(timeout=0.1)
                except Empty:
                    continue
                if audio_data is None:
                    continue
                # Encode audio as base64
                audio_bytes = audio_data.astype(np.float32).tobytes()
                audio_b64 = base64.b64encode(audio_bytes).decode('utf-8')
                # Send to server
                message = {
                    "type": "audio",
                    "data": audio_b64,
                    "sample_rate": self.sample_rate
                }
                await self.websocket.send(json.dumps(message))
            except Exception as e:
                if self.is_running:
                    logger.error(f"Send error: {e}")
                break
    async def _receive_loop(self):
        """Receive transcriptions from the server."""
        while self.is_running and self.websocket:
            try:
                message = await asyncio.wait_for(
                    self.websocket.recv(),
                    timeout=1.0
                )
                data = json.loads(message)
                msg_type = data.get("type", "")
                if msg_type == "transcription":
                    text = data.get("text", "")
                    is_preview = data.get("is_preview", False)
                    if text and self.on_transcription:
                        self.on_transcription(text, is_preview)
                elif msg_type == "error":
                    error_msg = data.get("message", "Unknown error")
                    logger.error(f"Server error: {error_msg}")
                    if self.on_error:
                        self.on_error(error_msg)
                elif msg_type == "pong":
                    pass  # Keep-alive response
            except asyncio.TimeoutError:
                continue
            except Exception as e:
                if self.is_running:
                    logger.error(f"Receive error: {e}")
                break
        # Connection lost
        self.is_connected = False
        self.is_authenticated = False
        if self.on_connection_change:
            self.on_connection_change(False)
    def _run_async(self):
        """Run the async event loop in a thread."""
        self.loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self.loop)
        try:
            # Connect
            connected = self.loop.run_until_complete(self._connect())
            if not connected:
                return
            # Run send and receive loops
            tasks = [
                self._send_loop(),
                self._receive_loop()
            ]
            self.loop.run_until_complete(asyncio.gather(*tasks))
        except Exception as e:
            logger.error(f"Async loop error: {e}")
        finally:
            if self.websocket:
                try:
                    self.loop.run_until_complete(self.websocket.close())
                except:
                    pass
            self.loop.close()
    def start(self):
        """Start the remote transcription client."""
        with self._lock:
            if self.is_running:
                return
            self.is_running = True
            # Start async loop in background thread
            self.send_thread = Thread(target=self._run_async, daemon=True)
            self.send_thread.start()
    def stop(self):
        """Stop the remote transcription client."""
        with self._lock:
            self.is_running = False
            # Signal end to server
            if self.websocket and self.loop:
                try:
                    asyncio.run_coroutine_threadsafe(
                        self.websocket.send(json.dumps({"type": "end"})),
                        self.loop
                    )
                except:
                    pass
            self.is_connected = False
            self.is_authenticated = False
    def send_audio(self, audio_data: np.ndarray):
        """
        Send audio data for transcription.
        Args:
            audio_data: Audio data as numpy array (float32, mono, sample_rate)
        """
        if self.is_connected and self.is_authenticated:
            self.audio_queue.put(audio_data)
    @property
    def connected(self) -> bool:
        """Check if connected and authenticated."""
        return self.is_connected and self.is_authenticated
 class RemoteTranscriptionManager:
    """
    Manages remote transcription with fallback to local processing.
    """
    def __init__(
        self,
        server_url: str,
        api_key: str,
        local_engine=None,
        on_transcription: Optional[Callable] = None,
        on_preview: Optional[Callable] = None
    ):
        """
        Initialize the remote transcription manager.
        Args:
            server_url: Remote transcription service URL
            api_key: API key for authentication
            local_engine: Local transcription engine for fallback
            on_transcription: Callback for final transcriptions
            on_preview: Callback for preview transcriptions
        """
        self.server_url = server_url
        self.api_key = api_key
        self.local_engine = local_engine
        self.on_transcription = on_transcription
        self.on_preview = on_preview
        self.client: Optional[RemoteTranscriptionClient] = None
        self.use_remote = True
        self.is_running = False
    def _handle_transcription(self, text: str, is_preview: bool):
        """Handle transcription from remote service."""
        if is_preview:
            if self.on_preview:
                self.on_preview(text)
        else:
            if self.on_transcription:
                self.on_transcription(text)
    def _handle_error(self, error: str):
        """Handle error from remote service."""
        logger.error(f"Remote transcription error: {error}")
        # Could switch to local fallback here
    def _handle_connection_change(self, connected: bool):
        """Handle connection status change."""
        if connected:
            logger.info("Remote transcription connected")
        else:
            logger.warning("Remote transcription disconnected")
            # Could switch to local fallback here
    def start(self):
        """Start remote transcription."""
        if self.is_running:
            return
        self.is_running = True
        if self.use_remote and self.server_url and self.api_key:
            self.client = RemoteTranscriptionClient(
                server_url=self.server_url,
                api_key=self.api_key,
                on_transcription=self._handle_transcription,
                on_error=self._handle_error,
                on_connection_change=self._handle_connection_change
            )
            self.client.start()
    def stop(self):
        """Stop remote transcription."""
        self.is_running = False
        if self.client:
            self.client.stop()
            self.client = None
    def send_audio(self, audio_data: np.ndarray):
        """Send audio for transcription."""
        if self.client and self.client.connected:
            self.client.send_audio(audio_data)
        elif self.local_engine:
            # Fallback to local processing
            pass  # Local engine handles its own audio capture
    @property
    def is_connected(self) -> bool:
        """Check if remote service is connected."""
        return self.client is not None and self.client.connected
--- a/client/server_sync.py
+++ b/client/server_sync.py
@@ -2,7 +2,9 @@
 import requests
 import json
-from typing import Optional
+import base64
 from pathlib import Path
 from typing import Optional, List
 from datetime import datetime
 import threading
 import queue
@@ -10,22 +12,41 @@ from concurrent.futures import ThreadPoolExecutor
 class ServerSyncClient:
-    """Client for syncing transcriptions to a PHP server."""
+    """Client for syncing transcriptions to a multi-user server."""
-    def __init__(self, url: str, room: str, passphrase: str, user_name: str):
+    def __init__(self, url: str, room: str, passphrase: str, user_name: str,
                 fonts_dir: Optional[Path] = None,
                 font_source: str = "None",
                 websafe_font: Optional[str] = None,
                 google_font: Optional[str] = None,
                 custom_font_file: Optional[str] = None):
        """
        Initialize server sync client.
        Args:
-            url: Server URL (e.g., http://example.com/transcription/server.php)
+            url: Server URL (e.g., http://example.com/api/send)
            room: Room name
            passphrase: Room passphrase
            user_name: User's display name
            fonts_dir: Optional directory containing custom fonts to upload
            font_source: Font source type ("None", "Web-Safe", "Google Font", "Custom File")
            websafe_font: Web-safe font name (e.g., "Arial", "Times New Roman")
            google_font: Google Font name (e.g., "Roboto", "Open Sans")
            custom_font_file: Path to a custom font file for this speaker
        """
        self.url = url
        self.room = room
        self.passphrase = passphrase
        self.user_name = user_name
        self.fonts_dir = fonts_dir
        self.font_source = font_source
        self.websafe_font = websafe_font
        self.google_font = google_font
        self.custom_font_file = custom_font_file
        # Font info to send with transcriptions
        self.font_family: Optional[str] = None
        self.font_type: Optional[str] = None  # "websafe", "google", "custom"
        # Queue for sending transcriptions asynchronously
        self.send_queue = queue.Queue()
@@ -50,6 +71,153 @@ class ServerSyncClient:
        self.send_thread.start()
        print(f"Server sync started: room={self.room}")
        # Set up font based on source type
        if self.font_source == "Web-Safe" and self.websafe_font:
            self.font_family = self.websafe_font
            self.font_type = "websafe"
            print(f"Using web-safe font: {self.font_family}")
        elif self.font_source == "Google Font" and self.google_font:
            self.font_family = self.google_font
            self.font_type = "google"
            print(f"Using Google Font: {self.font_family}")
        elif self.font_source == "Custom File" and self.custom_font_file:
            self._upload_custom_font()
        # Legacy fallback: upload all fonts from fonts_dir if available
        elif self.fonts_dir:
            self._upload_fonts()
    def _upload_custom_font(self):
        """Upload the user's custom font file to the server for per-speaker fonts."""
        if not self.custom_font_file:
            return
        font_path = Path(self.custom_font_file)
        if not font_path.exists():
            print(f"Custom font file not found: {self.custom_font_file}")
            return
        # Validate extension
        font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
        if font_path.suffix.lower() not in font_extensions:
            print(f"Invalid font file type: {font_path.suffix}")
            return
        mime_types = {
            '.ttf': 'font/ttf',
            '.otf': 'font/otf',
            '.woff': 'font/woff',
            '.woff2': 'font/woff2'
        }
        try:
            # Read and encode font data
            with open(font_path, 'rb') as f:
                font_data = base64.b64encode(f.read()).decode('utf-8')
            # Font family name is filename without extension
            self.font_family = font_path.stem
            font_filename = font_path.name
            print(f"Uploading custom font: {font_filename} (family: {self.font_family})")
            # Upload to server
            from urllib.parse import urlparse
            parsed = urlparse(self.url)
            base_url = f"{parsed.scheme}://{parsed.netloc}"
            fonts_url = f"{base_url}/api/fonts"
            response = requests.post(
                fonts_url,
                json={
                    'room': self.room,
                    'passphrase': self.passphrase,
                    'fonts': [{
                        'name': font_filename,
                        'data': font_data,
                        'mime': mime_types.get(font_path.suffix.lower(), 'font/ttf')
                    }]
                },
                timeout=30.0
            )
            if response.status_code == 200:
                result = response.json()
                self.font_type = "custom"
                print(f"Custom font uploaded: {self.font_family}")
            else:
                print(f"Custom font upload failed: {response.status_code}")
                self.font_family = None
                self.font_type = None
        except Exception as e:
            print(f"Error uploading custom font: {e}")
            self.font_family = None
            self.font_type = None
    def _upload_fonts(self):
        """Upload custom fonts to the server."""
        if not self.fonts_dir or not self.fonts_dir.exists():
            return
        # Find font files
        font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
        font_files = [f for f in self.fonts_dir.iterdir()
                      if f.is_file() and f.suffix.lower() in font_extensions]
        if not font_files:
            return
        # Prepare font data
        fonts = []
        mime_types = {
            '.ttf': 'font/ttf',
            '.otf': 'font/otf',
            '.woff': 'font/woff',
            '.woff2': 'font/woff2'
        }
        for font_file in font_files:
            try:
                with open(font_file, 'rb') as f:
                    font_data = base64.b64encode(f.read()).decode('utf-8')
                fonts.append({
                    'name': font_file.name,
                    'data': font_data,
                    'mime': mime_types.get(font_file.suffix.lower(), 'font/ttf')
                })
                print(f"Prepared font for upload: {font_file.name}")
            except Exception as e:
                print(f"Error reading font file {font_file}: {e}")
        if not fonts:
            return
        # Upload to server
        try:
            # Extract base URL for fonts endpoint
            from urllib.parse import urlparse
            parsed = urlparse(self.url)
            base_url = f"{parsed.scheme}://{parsed.netloc}"
            fonts_url = f"{base_url}/api/fonts"
            response = requests.post(
                fonts_url,
                json={
                    'room': self.room,
                    'passphrase': self.passphrase,
                    'fonts': fonts
                },
                timeout=30.0  # Longer timeout for font uploads
            )
            if response.status_code == 200:
                result = response.json()
                print(f"Fonts uploaded successfully: {result.get('message', '')}")
            else:
                print(f"Font upload failed: {response.status_code}")
        except Exception as e:
            print(f"Error uploading fonts: {e}")
    def stop(self):
        """Stop the sync client."""
        self.is_running = False
@@ -59,13 +227,14 @@ class ServerSyncClient:
        self.executor.shutdown(wait=False)  # Don't wait - let pending requests finish in background
        print("Server sync stopped")
-    def send_transcription(self, text: str, timestamp: Optional[datetime] = None):
+    def send_transcription(self, text: str, timestamp: Optional[datetime] = None, is_preview: bool = False):
        """
        Send a transcription to the server (non-blocking).
        Args:
            text: Transcription text
            timestamp: Timestamp (defaults to now)
            is_preview: Whether this is a preview transcription
        """
        if timestamp is None:
            timestamp = datetime.now()
@@ -78,9 +247,20 @@ class ServerSyncClient:
        self.send_queue.put({
            'text': text,
            'timestamp': timestamp.strftime("%H:%M:%S"),
            'is_preview': is_preview,
            'queue_time': queue_time  # For debugging
        })
    def send_preview(self, text: str, timestamp: Optional[datetime] = None):
        """
        Send a preview transcription to the server (non-blocking).
        Args:
            text: Preview transcription text
            timestamp: Timestamp (defaults to now)
        """
        self.send_transcription(text, timestamp, is_preview=True)
    def _send_loop(self):
        """Background thread for sending transcriptions."""
        while self.is_running:
@@ -122,28 +302,25 @@ class ServerSyncClient:
                'passphrase': self.passphrase,
                'user_name': self.user_name,
                'text': trans_data['text'],
-                'timestamp': trans_data['timestamp']
+                'timestamp': trans_data['timestamp'],
                'is_preview': trans_data.get('is_preview', False)
            }
-            # Detect server type and send appropriately
+            # Add font info if user has a custom font configured
-            # PHP servers have "server.php" in URL and need ?action=send
+            if self.font_family:
-            # Node.js servers have "/api/send" in URL and don't need it
+                payload['font_family'] = self.font_family
-            request_start = time.time()
+                payload['font_type'] = self.font_type  # "websafe", "google", or "custom"
-            if 'server.php' in self.url:
+                print(f"[Server Sync] Sending with font: {self.font_family} ({self.font_type})")
                # PHP server - add action parameter
                response = requests.post(
                    self.url,
                    params={'action': 'send'},
                    json=payload,
                    timeout=2.0  # Reduced timeout for faster failure detection
                )
            else:
-                # Node.js server - no action parameter
+                print(f"[Server Sync] No font configured (font_source={self.font_source})")
-                response = requests.post(
+
-                    self.url,
+            # Send to Node.js server
-                    json=payload,
+            request_start = time.time()
-                    timeout=2.0  # Reduced timeout for faster failure detection
+            response = requests.post(
-                )
+                self.url,
                json=payload,
                timeout=2.0  # Reduced timeout for faster failure detection
            )
            request_time = (time.time() - request_start) * 1000
            print(f"[Server Sync] HTTP request: {request_time:.0f}ms, Status: {response.status_code}")
--- a/client/transcription_engine_realtime.py
+++ b/client/transcription_engine_realtime.py
@@ -29,7 +29,7 @@ class TranscriptionResult:
    def __repr__(self) -> str:
        time_str = self.timestamp.strftime("%H:%M:%S")
        prefix = "[FINAL]" if self.is_final else "[PREVIEW]"
-        if self.user_name:
+        if self.user_name and self.user_name.strip():
            return f"{prefix} [{time_str}] {self.user_name}: {self.text}"
        return f"{prefix} [{time_str}] {self.text}"
@@ -63,6 +63,7 @@ class RealtimeTranscriptionEngine:
        # Realtime preview settings
        enable_realtime_transcription: bool = False,
        realtime_model: str = "tiny.en",
        realtime_processing_pause: float = 0.1,  # How often to update preview (lower = more frequent)
        # VAD settings
        silero_sensitivity: float = 0.4,
        silero_use_onnx: bool = True,
@@ -106,11 +107,21 @@ class RealtimeTranscriptionEngine:
            user_name: User name for transcriptions
        """
        self.model = model
        self.device = device
        self.language = language
        self.compute_type = compute_type
        # Resolve device - 'auto' means use CUDA if available, else CPU
        if device == 'auto':
            try:
                import torch
                self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
            except:
                self.device = 'cpu'
        else:
            self.device = device
        self.enable_realtime = enable_realtime_transcription
        self.realtime_model = realtime_model
        self.realtime_processing_pause = realtime_processing_pause
        self.user_name = user_name
        # Callbacks
@@ -131,6 +142,7 @@ class RealtimeTranscriptionEngine:
        # Store configuration for recorder initialization
        self.config = {
            'model': model,
            'device': self.device,  # Use resolved device (auto -> cuda/cpu)
            'language': language if language != 'auto' else None,
            'compute_type': compute_type if compute_type != 'default' else 'default',
            'input_device_index': input_device_index,
@@ -145,8 +157,18 @@ class RealtimeTranscriptionEngine:
            'initial_prompt': initial_prompt if initial_prompt else None,
            'enable_realtime_transcription': enable_realtime_transcription,
            'realtime_model_type': realtime_model if enable_realtime_transcription else None,
            'realtime_processing_pause': realtime_processing_pause if enable_realtime_transcription else 0.2,
            # The realtime callback is added during initialize() after set_callbacks is called
        }
    def _is_cuda_available(self) -> bool:
        """Check if CUDA is available."""
        try:
            import torch
            return torch.cuda.is_available()
        except:
            return False
    def set_callbacks(
        self,
        realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None,
@@ -198,8 +220,15 @@ class RealtimeTranscriptionEngine:
            try:
                print(f"Initializing RealtimeSTT with model: {self.model}")
                print(f"  Device: {self.device}, Compute type: {self.compute_type}")
                if self.enable_realtime:
                    print(f"  Realtime preview enabled with model: {self.realtime_model}")
                    print(f"  Realtime processing pause: {self.realtime_processing_pause}s")
                # Add realtime transcription callback if enabled
                # This provides word-by-word updates as speech is being processed
                if self.enable_realtime:
                    self.config['on_realtime_transcription_update'] = self._on_realtime_transcription
                # Create recorder with configuration
                self.recorder = AudioToTextRecorder(**self.config)
@@ -325,7 +354,7 @@ class RealtimeTranscriptionEngine:
        Returns:
            True if model changed successfully
        """
-        was_running = self.is_running
+        was_running = self.is_recording
        # Stop current recording
        self.stop()
@@ -355,7 +384,7 @@ class RealtimeTranscriptionEngine:
        Returns:
            True if device changed successfully
        """
-        was_running = self.is_running
+        was_running = self.is_recording
        # Stop current recording
        self.stop()
@@ -396,7 +425,7 @@ class RealtimeTranscriptionEngine:
        self.config['webrtc_sensitivity'] = webrtc_sensitivity
        # If running, need to restart to apply changes
-        if self.is_running:
+        if self.is_recording:
            print("VAD settings updated. Restart transcription to apply changes.")
    def set_user_name(self, user_name: str):
@@ -404,7 +433,7 @@ class RealtimeTranscriptionEngine:
        self.user_name = user_name
    def __repr__(self) -> str:
-        return f"RealtimeTranscriptionEngine(model={self.model}, device={self.device}, running={self.is_running})"
+        return f"RealtimeTranscriptionEngine(model={self.model}, device={self.device}, running={self.is_recording})"
    def __del__(self):
        """Cleanup when object is destroyed."""
--- a/config/default_config.yaml
+++ b/config/default_config.yaml
@@ -16,6 +16,7 @@ transcription:
  # Realtime preview settings (optional faster preview before final transcription)
  enable_realtime_transcription: false
  realtime_model: "tiny.en"  # Faster model for instant preview
  realtime_processing_pause: 0.1  # Seconds between preview updates (lower = more responsive, default 0.1)
  # VAD (Voice Activity Detection) settings
  silero_sensitivity: 0.4  # 0.0-1.0, lower = more sensitive (detects more speech)
@@ -35,16 +36,26 @@ transcription:
  # Performance settings
  no_log_file: true  # Disable RealtimeSTT logging
  # Fast speaker mode - for speakers who talk quickly without pauses
  # Reduces silence detection thresholds for more frequent transcription outputs
  continuous_mode: false
 server_sync:
  enabled: false
  url: "http://localhost:3000/api/send"
  room: "default"
  passphrase: ""
  # Font settings are now in the display section (shared for local and server sync)
 display:
  show_timestamps: true
  max_lines: 100
-  font_family: "Courier"
+  # Font settings (used for both local display and server sync)
  font_source: "System Font"  # Options: System Font, Web-Safe, Google Font, Custom File
  font_family: "Courier"  # System font name (local only, won't work with server sync)
  websafe_font: "Arial"  # Web-safe font name
  google_font: "Roboto"  # Google Font name
  custom_font_file: ""  # Path to custom font file (.ttf, .otf, .woff, .woff2)
  font_size: 12
  theme: "dark"
  fade_after_seconds: 10  # Time before transcriptions fade out (0 = never fade)
@@ -52,3 +63,9 @@ display:
 web_server:
  port: 8080
  host: "127.0.0.1"
 remote_processing:
  enabled: false  # Enable remote transcription offloading
  server_url: ""  # WebSocket URL of remote transcription service (e.g., ws://your-server:8765/ws/transcribe)
  api_key: ""  # API key for authentication
  fallback_to_local: true  # Fall back to local processing if remote fails
--- a/gui/main_window_qt.py
+++ b/gui/main_window_qt.py
@@ -9,16 +9,16 @@ from PySide6.QtGui import QFont
 from pathlib import Path
 import sys
-# Add parent directory to path for imports
+# Add parent directory to path for imports (resolve symlinks)
-sys.path.append(str(Path(__file__).parent.parent))
+sys.path.append(str(Path(__file__).resolve().parent.parent))
 from client.config import Config
 from client.device_utils import DeviceManager
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.server_sync import ServerSyncClient
 from gui.transcription_display_qt import TranscriptionDisplay
 from gui.settings_dialog_qt import SettingsDialog
 from server.web_display import TranscriptionWebServer
 from version import __version__
 import asyncio
 from threading import Thread
@@ -96,9 +96,13 @@ class MainWindow(QMainWindow):
        # Server sync components
        self.server_sync_client: ServerSyncClient = None
        # Store all transcriptions for saving (separate from display)
        self.transcriptions: list = []
        # Configure window
        self.setWindowTitle("Local Transcription")
-        self.resize(900, 700)
+        self.resize(700, 300)
        self.setMinimumSize(600, 280)
        # Set application icon
        # In PyInstaller frozen executables, use _MEIPASS for bundled files
@@ -108,7 +112,7 @@ class MainWindow(QMainWindow):
            icon_path = Path(sys._MEIPASS) / "LocalTranscription.png"
        else:
            # Running in normal Python
-            icon_path = Path(__file__).parent.parent / "LocalTranscription.png"
+            icon_path = Path(__file__).resolve().parent.parent / "LocalTranscription.png"
        if icon_path.exists():
            from PySide6.QtGui import QIcon
@@ -174,13 +178,14 @@ class MainWindow(QMainWindow):
        # Status bar
        status_widget = QWidget()
-        status_widget.setFixedHeight(60)
+        status_widget.setFixedHeight(40)
        status_layout = QHBoxLayout()
        status_layout.setContentsMargins(0, 0, 0, 0)
        status_widget.setLayout(status_layout)
        self.status_label = QLabel("⚫ Initializing...")
        status_font = QFont()
-        status_font.setPointSize(14)
+        status_font.setPointSize(12)
        self.status_label.setFont(status_font)
        status_layout.addWidget(self.status_label)
@@ -193,28 +198,36 @@ class MainWindow(QMainWindow):
        self.user_label = QLabel(f"User: {user_name}")
        status_layout.addWidget(self.user_label)
        # Web display link
        web_host = self.config.get('web_server.host', '127.0.0.1')
        web_port = self.config.get('web_server.port', 8080)
        web_url = f"http://{web_host}:{web_port}"
        self.web_link = QLabel(f'<a href="{web_url}">🌐 Open Web Display</a>')
        self.web_link.setOpenExternalLinks(True)
        self.web_link.setToolTip(f"Click to open {web_url} in browser (for OBS)")
        self.web_link.setStyleSheet("QLabel { color: #4CAF50; }")
        status_layout.addWidget(self.web_link)
        status_layout.addStretch()
        main_layout.addWidget(status_widget)
-        # Transcription display
+        # Web display links section
-        self.transcription_display = TranscriptionDisplay(
+        links_widget = QWidget()
-            max_lines=self.config.get('display.max_lines', 100),
+        links_layout = QVBoxLayout()
-            show_timestamps=self.config.get('display.show_timestamps', True),
+        links_layout.setContentsMargins(0, 5, 0, 5)
-            font_family=self.config.get('display.font_family', 'Courier'),
+        links_layout.setSpacing(5)
-            font_size=self.config.get('display.font_size', 12)
+        links_widget.setLayout(links_layout)
-        )
+
-        main_layout.addWidget(self.transcription_display)
+        # Local web display link
        web_host = self.config.get('web_server.host', '127.0.0.1')
        web_port = self.config.get('web_server.port', 8080)
        web_url = f"http://{web_host}:{web_port}"
        self.web_link = QLabel(f'🌐 Local Web Display: <a href="{web_url}">{web_url}</a>')
        self.web_link.setOpenExternalLinks(True)
        self.web_link.setToolTip("Click to open in browser (for OBS)")
        self.web_link.setStyleSheet("QLabel a { color: #4CAF50; }")
        links_layout.addWidget(self.web_link)
        # Multi-user sync display link (shown when server sync is enabled)
        self.sync_link = QLabel("")
        self.sync_link.setOpenExternalLinks(True)
        self.sync_link.setStyleSheet("QLabel a { color: #2196F3; }")
        self.sync_link.setVisible(False)
        links_layout.addWidget(self.sync_link)
        self._update_sync_link()
        main_layout.addWidget(links_widget)
        # Control buttons
        control_widget = QWidget()
@@ -232,7 +245,7 @@ class MainWindow(QMainWindow):
        self.start_button.setStyleSheet("background-color: #2ecc71; color: white;")
        control_layout.addWidget(self.start_button)
-        self.clear_button = QPushButton("Clear")
+        self.clear_button = QPushButton("🗑 Clear")
        self.clear_button.setFixedSize(120, 50)
        self.clear_button.clicked.connect(self._clear_transcriptions)
        control_layout.addWidget(self.clear_button)
@@ -246,6 +259,12 @@ class MainWindow(QMainWindow):
        main_layout.addWidget(control_widget)
        # Version label (bottom right)
        version_label = QLabel(f"v{__version__}")
        version_label.setStyleSheet("QLabel { color: #666; font-size: 10px; }")
        version_label.setAlignment(Qt.AlignRight)
        main_layout.addWidget(version_label)
    def _initialize_components(self):
        """Initialize RealtimeSTT transcription engine."""
        # Update status
@@ -271,6 +290,20 @@ class MainWindow(QMainWindow):
        user_name = self.config.get('user.name', 'User')
        # Check for continuous/fast speaker mode
        continuous_mode = self.config.get('transcription.continuous_mode', False)
        # Get timing settings - use faster values if continuous mode is enabled
        if continuous_mode:
            # Faster settings for speakers who talk without pauses
            post_speech_silence = 0.15  # Reduced from default 0.3
            min_gap = 0.0  # No gap between recordings
            min_recording = 0.3  # Shorter minimum recording
        else:
            post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
            min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
            min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
        self.transcription_engine = RealtimeTranscriptionEngine(
            model=model,
            device=device,
@@ -278,12 +311,13 @@ class MainWindow(QMainWindow):
            compute_type=compute_type,
            enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
            realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
            realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
            silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
            silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
            webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
-            post_speech_silence_duration=self.config.get('transcription.post_speech_silence_duration', 0.3),
+            post_speech_silence_duration=post_speech_silence,
-            min_length_of_recording=self.config.get('transcription.min_length_of_recording', 0.5),
+            min_length_of_recording=min_recording,
-            min_gap_between_recordings=self.config.get('transcription.min_gap_between_recordings', 0.0),
+            min_gap_between_recordings=min_gap,
            pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
            beam_size=self.config.get('transcription.beam_size', 5),
            initial_prompt=self.config.get('transcription.initial_prompt', ''),
@@ -332,6 +366,12 @@ class MainWindow(QMainWindow):
            max_lines = self.config.get('display.max_lines', 50)
            font_family = self.config.get('display.font_family', 'Arial')
            font_size = self.config.get('display.font_size', 16)
            fonts_dir = self.config.fonts_dir  # Custom fonts directory
            # Font source settings
            font_source = self.config.get('display.font_source', 'System Font')
            websafe_font = self.config.get('display.websafe_font', 'Arial')
            google_font = self.config.get('display.google_font', 'Roboto')
            # Try up to 5 ports if the default is in use
            ports_to_try = [port] + [port + i for i in range(1, 5)]
@@ -346,7 +386,11 @@ class MainWindow(QMainWindow):
                    fade_after_seconds=fade_after_seconds,
                    max_lines=max_lines,
                    font_family=font_family,
-                    font_size=font_size
+                    font_size=font_size,
                    fonts_dir=fonts_dir,
                    font_source=font_source,
                    websafe_font=websafe_font,
                    google_font=google_font
                )
                self.web_server_thread = WebServerThread(self.web_server)
                self.web_server_thread.start()
@@ -450,15 +494,21 @@ class MainWindow(QMainWindow):
            return
        try:
-            # Update display with preview (thread-safe Qt call)
+            # Broadcast preview to local web server
-            from PySide6.QtCore import QMetaObject, Q_ARG
+            if self.web_server and self.web_server_thread and self.web_server_thread.loop:
-            QMetaObject.invokeMethod(
+                asyncio.run_coroutine_threadsafe(
-                self.transcription_display,
+                    self.web_server.broadcast_preview(
-                "add_transcription",
+                        result.text,
-                Qt.QueuedConnection,
+                        result.user_name,
-                Q_ARG(str, f"[PREVIEW] {result.text}"),
+                        result.timestamp
-                Q_ARG(str, result.user_name)
+                    ),
-            )
+                    self.web_server_thread.loop
                )
            # Send preview to server sync if enabled
            if self.server_sync_client:
                self.server_sync_client.send_preview(result.text, result.timestamp)
        except Exception as e:
            print(f"Error handling realtime transcription: {e}")
@@ -468,15 +518,8 @@ class MainWindow(QMainWindow):
            return
        try:
-            # Update display (thread-safe Qt call)
+            # Store transcription for saving
-            from PySide6.QtCore import QMetaObject, Q_ARG
+            self.transcriptions.append(result)
            QMetaObject.invokeMethod(
                self.transcription_display,
                "add_transcription",
                Qt.QueuedConnection,
                Q_ARG(str, result.text),
                Q_ARG(str, result.user_name)
            )
            # Broadcast to web server if enabled
            if self.web_server and self.web_server_thread:
@@ -508,18 +551,27 @@ class MainWindow(QMainWindow):
    def _clear_transcriptions(self):
        """Clear all transcriptions."""
        if not self.transcriptions:
            QMessageBox.information(self, "No Transcriptions", "There are no transcriptions to clear.")
            return
        reply = QMessageBox.question(
            self,
            "Clear Transcriptions",
-            "Are you sure you want to clear all transcriptions?",
+            f"Are you sure you want to clear {len(self.transcriptions)} transcription(s)?",
            QMessageBox.Yes | QMessageBox.No
        )
        if reply == QMessageBox.Yes:
-            self.transcription_display.clear_all()
+            self.transcriptions.clear()
            QMessageBox.information(self, "Cleared", "All transcriptions have been cleared.")
    def _save_transcriptions(self):
        """Save transcriptions to file."""
        if not self.transcriptions:
            QMessageBox.warning(self, "No Transcriptions", "There are no transcriptions to save.")
            return
        filepath, _ = QFileDialog.getSaveFileName(
            self,
            "Save Transcriptions",
@@ -528,10 +580,21 @@ class MainWindow(QMainWindow):
        )
        if filepath:
-            if self.transcription_display.save_to_file(filepath):
+            try:
                show_timestamps = self.config.get('display.show_timestamps', True)
                with open(filepath, 'w', encoding='utf-8') as f:
                    for result in self.transcriptions:
                        line_parts = []
                        if show_timestamps:
                            time_str = result.timestamp.strftime("%H:%M:%S")
                            line_parts.append(f"[{time_str}]")
                        if result.user_name and result.user_name.strip():
                            line_parts.append(f"{result.user_name}:")
                        line_parts.append(result.text)
                        f.write(" ".join(line_parts) + "\n")
                QMessageBox.information(self, "Saved", f"Transcriptions saved to:\n{filepath}")
-            else:
+            except Exception as e:
-                QMessageBox.critical(self, "Error", "Failed to save transcriptions")
+                QMessageBox.critical(self, "Error", f"Failed to save transcriptions:\n{e}")
    def _open_settings(self):
        """Open settings dialog."""
@@ -569,22 +632,20 @@ class MainWindow(QMainWindow):
        user_name = self.config.get('user.name', 'User')
        self.user_label.setText(f"User: {user_name}")
        # Update display settings
        show_timestamps = self.config.get('display.show_timestamps', True)
        self.transcription_display.set_max_lines(self.config.get('display.max_lines', 100))
        self.transcription_display.set_show_timestamps(show_timestamps)
        self.transcription_display.set_font(
            self.config.get('display.font_family', 'Courier'),
            self.config.get('display.font_size', 12)
        )
        # Update web server settings
        if self.web_server:
-            self.web_server.show_timestamps = show_timestamps
+            self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
            self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
            self.web_server.max_lines = self.config.get('display.max_lines', 50)
            self.web_server.font_family = self.config.get('display.font_family', 'Arial')
            self.web_server.font_size = self.config.get('display.font_size', 16)
            # Update font source settings
            self.web_server.font_source = self.config.get('display.font_source', 'System Font')
            self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
            self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
        # Update sync link visibility based on server sync settings
        self._update_sync_link()
        # Restart server sync if it was running and settings changed
        if self.is_transcribing and self.server_sync_client:
@@ -656,18 +717,33 @@ class MainWindow(QMainWindow):
            room = self.config.get('server_sync.room', 'default')
            passphrase = self.config.get('server_sync.passphrase', '')
            user_name = self.config.get('user.name', 'User')
            fonts_dir = self.config.fonts_dir  # Custom fonts directory
            # Font settings (shared with display settings)
            # Note: "System Font" only works locally, so we treat it as "None" for server sync
            font_source = self.config.get('display.font_source', 'System Font')
            if font_source == "System Font":
                font_source = "None"  # System fonts don't work on remote displays
            websafe_font = self.config.get('display.websafe_font', '')
            google_font = self.config.get('display.google_font', '')
            custom_font_file = self.config.get('display.custom_font_file', '')
            if not url:
                print("Server sync enabled but no URL configured")
                return
-            print(f"Starting server sync: {url}, room: {room}, user: {user_name}")
+            print(f"Starting server sync: {url}, room: {room}, user: {user_name}, font: {font_source}")
            self.server_sync_client = ServerSyncClient(
                url=url,
                room=room,
                passphrase=passphrase,
-                user_name=user_name
+                user_name=user_name,
                fonts_dir=fonts_dir,
                font_source=font_source,
                websafe_font=websafe_font if websafe_font else None,
                google_font=google_font if google_font else None,
                custom_font_file=custom_font_file if custom_font_file else None
            )
            self.server_sync_client.start()
@@ -679,6 +755,40 @@ class MainWindow(QMainWindow):
                f"Failed to start server sync:\n{e}\n\nTranscription will continue locally."
            )
    def _update_sync_link(self):
        """Update the multi-user sync link visibility and URL."""
        server_sync_enabled = self.config.get('server_sync.enabled', False)
        server_url = self.config.get('server_sync.url', '')
        room = self.config.get('server_sync.room', 'default')
        if server_sync_enabled and server_url:
            # Extract base URL from the API endpoint (e.g., http://server:3000/api/send -> http://server:3000)
            try:
                from urllib.parse import urlparse, urlencode
                parsed = urlparse(server_url)
                base_url = f"{parsed.scheme}://{parsed.netloc}"
                # Get display settings to pass as URL parameters
                params = {
                    'room': room,
                    'fontfamily': self.config.get('display.font_family', 'Arial'),
                    'fontsize': self.config.get('display.font_size', 16),
                    'fade': self.config.get('display.fade_after_seconds', 10),
                    'timestamps': 'true' if self.config.get('display.show_timestamps', True) else 'false',
                    'maxlines': self.config.get('display.max_lines', 50)
                }
                display_url = f"{base_url}/display?{urlencode(params)}"
                # Show shorter text with just address and room
                display_text = f"{base_url} (room: {room})"
                self.sync_link.setText(f'🔗 Multi-User Display: <a href="{display_url}">{display_text}</a>')
                self.sync_link.setToolTip(f"Click to open: {display_url}")
                self.sync_link.setVisible(True)
            except Exception as e:
                print(f"Error parsing server URL: {e}")
                self.sync_link.setVisible(False)
        else:
            self.sync_link.setVisible(False)
    def closeEvent(self, event):
        """Handle window closing."""
        # Stop transcription if running
--- a/gui/settings_dialog_qt.py
+++ b/gui/settings_dialog_qt.py
@@ -3,10 +3,11 @@
 from PySide6.QtWidgets import (
    QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
    QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
-    QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget
+    QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
    QFileDialog
 )
 from PySide6.QtCore import Qt
-from PySide6.QtGui import QScreen
+from PySide6.QtGui import QScreen, QFontDatabase
 from typing import Callable, List, Tuple
@@ -179,6 +180,16 @@ class SettingsDialog(QDialog):
        self.realtime_model_combo.addItems(["tiny", "tiny.en", "base", "base.en"])
        realtime_layout.addRow("Preview Model:", self.realtime_model_combo)
        self.realtime_pause_input = QLineEdit()
        self.realtime_pause_input.setToolTip(
            "Seconds between preview updates:\n"
            "• Lower values = More responsive, more frequent updates\n"
            "• Higher values = Less CPU usage, updates less often\n"
            "• 0.1 is recommended for real-time streaming\n"
            "• Try 0.05 for even faster updates"
        )
        realtime_layout.addRow("Preview Update Interval (s):", self.realtime_pause_input)
        realtime_group.setLayout(realtime_layout)
        content_layout.addWidget(realtime_group)
@@ -261,6 +272,16 @@ class SettingsDialog(QDialog):
        )
        timing_layout.addRow("Pre-Recording Buffer (s):", self.pre_buffer_input)
        self.continuous_mode_check = QCheckBox()
        self.continuous_mode_check.setToolTip(
            "Fast Speaker Mode:\n"
            "• For speakers who talk quickly without pauses\n"
            "• Reduces silence detection thresholds\n"
            "• Produces more frequent transcription outputs\n"
            "• May result in more fragmented sentences"
        )
        timing_layout.addRow("Fast Speaker Mode:", self.continuous_mode_check)
        timing_group.setLayout(timing_layout)
        content_layout.addWidget(timing_group)
@@ -281,10 +302,79 @@ class SettingsDialog(QDialog):
        )
        display_layout.addRow("Max Lines:", self.maxlines_input)
        # Font source selector (shared for local display and server sync)
        self.display_font_source_combo = QComboBox()
        self.display_font_source_combo.addItems(["System Font", "Web-Safe", "Google Font", "Custom File"])
        self.display_font_source_combo.setToolTip(
            "Choose font for local display and server sync:\n"
            "• System Font - Local only (won't work with server sync)\n"
            "• Web-Safe - Universal fonts (Arial, Comic Sans, etc.)\n"
            "• Google Font - Free fonts from fonts.google.com\n"
            "• Custom File - Upload your own font file"
        )
        self.display_font_source_combo.currentTextChanged.connect(self._on_display_font_source_changed)
        display_layout.addRow("Font Source:", self.display_font_source_combo)
        # System font selector
        self.font_family_combo = QComboBox()
-        self.font_family_combo.setToolTip("Font family for transcription display")
+        self.font_family_combo.setToolTip("Font family for transcription display (system fonts)")
-        self.font_family_combo.addItems(["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"])
+        self.font_family_combo.setEditable(True)
-        display_layout.addRow("Font Family:", self.font_family_combo)
+        self.font_family_combo.setMaxVisibleItems(20)
        system_fonts = QFontDatabase.families()
        common_fonts = ["Courier", "Arial", "Times New Roman", "Consolas", "Monaco", "Monospace"]
        ordered_fonts = []
        for font in common_fonts:
            if font in system_fonts:
                ordered_fonts.append(font)
        for font in sorted(system_fonts):
            if font not in ordered_fonts:
                ordered_fonts.append(font)
        self.font_family_combo.addItems(ordered_fonts)
        display_layout.addRow("System Font:", self.font_family_combo)
        # Web-safe font selector for display
        self.display_websafe_combo = QComboBox()
        display_websafe_fonts = [
            "Arial", "Arial Black", "Comic Sans MS", "Courier New",
            "Georgia", "Impact", "Lucida Console", "Lucida Sans Unicode",
            "Palatino Linotype", "Tahoma", "Times New Roman", "Trebuchet MS", "Verdana"
        ]
        self.display_websafe_combo.addItems(display_websafe_fonts)
        self.display_websafe_combo.setToolTip("Web-safe fonts work on all systems")
        display_layout.addRow("Web-Safe Font:", self.display_websafe_combo)
        # Google Font selector for display
        self.display_google_font_combo = QComboBox()
        display_google_fonts = [
            "Roboto", "Open Sans", "Lato", "Montserrat", "Poppins",
            "Nunito", "Raleway", "Ubuntu", "Rubik", "Work Sans",
            "Inter", "Outfit", "Quicksand", "Comfortaa", "Varela Round",
            "Playfair Display", "Merriweather", "Lora", "PT Serif", "Crimson Text",
            "Roboto Mono", "Source Code Pro", "Fira Code", "JetBrains Mono", "IBM Plex Mono",
            "Bebas Neue", "Oswald", "Righteous", "Bangers", "Permanent Marker",
            "Pacifico", "Lobster", "Dancing Script", "Caveat", "Satisfy"
        ]
        self.display_google_font_combo.addItems(display_google_fonts)
        self.display_google_font_combo.setToolTip("Select a Google Font for display")
        display_layout.addRow("Google Font:", self.display_google_font_combo)
        # Custom font file picker (for server sync upload)
        custom_font_layout = QHBoxLayout()
        self.display_custom_font_input = QLineEdit()
        self.display_custom_font_input.setPlaceholderText("No file selected")
        self.display_custom_font_input.setReadOnly(True)
        self.display_custom_font_input.setToolTip(
            "Select a font file to use:\n"
            "• Supports .ttf, .otf, .woff, .woff2 files\n"
            "• Font is uploaded to server automatically when using Server Sync"
        )
        custom_font_layout.addWidget(self.display_custom_font_input)
        self.display_custom_font_browse = QPushButton("Browse...")
        self.display_custom_font_browse.clicked.connect(self._browse_display_custom_font)
        custom_font_layout.addWidget(self.display_custom_font_browse)
        display_layout.addRow("Custom Font File:", custom_font_layout)
        self.font_size_input = QLineEdit()
        self.font_size_input.setToolTip("Font size in pixels (12-20 recommended)")
@@ -301,6 +391,9 @@ class SettingsDialog(QDialog):
        display_group.setLayout(display_layout)
        content_layout.addWidget(display_group)
        # Initially show only System Font (default)
        self._on_display_font_source_changed("System Font")
        # Server Sync Group
        server_group = QGroupBox("Multi-User Server Sync (Optional)")
        server_layout = QFormLayout()
@@ -339,9 +432,55 @@ class SettingsDialog(QDialog):
        )
        server_layout.addRow("Passphrase:", self.server_passphrase_input)
        # Note about font settings
        font_note = QLabel("Font settings are in Display Settings above")
        font_note.setStyleSheet("color: #666; font-style: italic;")
        server_layout.addRow("", font_note)
        server_group.setLayout(server_layout)
        content_layout.addWidget(server_group)
        # Remote Processing Group
        remote_group = QGroupBox("Remote Processing (GPU Offload)")
        remote_layout = QFormLayout()
        remote_layout.setSpacing(10)
        self.remote_enabled_check = QCheckBox()
        self.remote_enabled_check.setToolTip(
            "Enable remote transcription processing:\n"
            "• Offload transcription to a GPU-equipped server\n"
            "• Reduces local CPU/GPU usage\n"
            "• Requires running the remote transcription service"
        )
        remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
        self.remote_url_input = QLineEdit()
        self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
        self.remote_url_input.setToolTip(
            "WebSocket URL of the remote transcription service:\n"
            "• Format: ws://host:port/ws/transcribe\n"
            "• Use wss:// for secure connections"
        )
        remote_layout.addRow("Server URL:", self.remote_url_input)
        self.remote_api_key_input = QLineEdit()
        self.remote_api_key_input.setEchoMode(QLineEdit.Password)
        self.remote_api_key_input.setPlaceholderText("your-api-key")
        self.remote_api_key_input.setToolTip(
            "API key for authentication with the remote service"
        )
        remote_layout.addRow("API Key:", self.remote_api_key_input)
        self.remote_fallback_check = QCheckBox("Enable")
        self.remote_fallback_check.setChecked(True)
        self.remote_fallback_check.setToolTip(
            "Fall back to local transcription if remote service is unavailable"
        )
        remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
        remote_group.setLayout(remote_layout)
        content_layout.addWidget(remote_group)
        # Add stretch to push everything to the top
        content_layout.addStretch()
@@ -367,6 +506,77 @@ class SettingsDialog(QDialog):
        """Update the Silero sensitivity label."""
        self.silero_label.setText(f"{value / 100:.2f}")
    def _open_fonts_folder(self):
        """Open the custom fonts folder in the system file manager."""
        import subprocess
        import sys
        from pathlib import Path
        fonts_dir = self.config.fonts_dir
        # Ensure the folder exists
        fonts_dir.mkdir(parents=True, exist_ok=True)
        # Open the folder in the system file manager
        if sys.platform == 'win32':
            subprocess.run(['explorer', str(fonts_dir)])
        elif sys.platform == 'darwin':
            subprocess.run(['open', str(fonts_dir)])
        else:
            # Linux
            subprocess.run(['xdg-open', str(fonts_dir)])
    def _on_display_font_source_changed(self, source: str):
        """Show/hide display font inputs based on selected source."""
        # Hide all font-specific inputs first
        self.font_family_combo.setVisible(False)
        self.display_websafe_combo.setVisible(False)
        self.display_google_font_combo.setVisible(False)
        self.display_custom_font_input.setVisible(False)
        self.display_custom_font_browse.setVisible(False)
        # Find the form layout rows and hide/show labels too
        parent = self.display_font_source_combo.parent()
        display_layout = parent.layout() if parent else None
        if display_layout and hasattr(display_layout, 'rowCount'):
            for i in range(display_layout.rowCount()):
                label = display_layout.itemAt(i, QFormLayout.LabelRole)
                field = display_layout.itemAt(i, QFormLayout.FieldRole)
                if label and field:
                    label_widget = label.widget()
                    if label_widget:
                        label_text = label_widget.text()
                        if label_text == "System Font:":
                            label_widget.setVisible(source == "System Font")
                        elif label_text == "Web-Safe Font:":
                            label_widget.setVisible(source == "Web-Safe")
                        elif label_text == "Google Font:":
                            label_widget.setVisible(source == "Google Font")
                        elif label_text == "Custom Font File:":
                            label_widget.setVisible(source == "Custom File")
        # Show the relevant input
        if source == "System Font":
            self.font_family_combo.setVisible(True)
        elif source == "Web-Safe":
            self.display_websafe_combo.setVisible(True)
        elif source == "Google Font":
            self.display_google_font_combo.setVisible(True)
        elif source == "Custom File":
            self.display_custom_font_input.setVisible(True)
            self.display_custom_font_browse.setVisible(True)
    def _browse_display_custom_font(self):
        """Browse for a custom font file."""
        file_path, _ = QFileDialog.getOpenFileName(
            self,
            "Select Font File",
            "",
            "Font Files (*.ttf *.otf *.woff *.woff2);;All Files (*)"
        )
        if file_path:
            self.display_custom_font_input.setText(file_path)
    def _load_current_settings(self):
        """Load current settings from config."""
        # User settings
@@ -402,6 +612,7 @@ class SettingsDialog(QDialog):
        self.realtime_enabled_check.setChecked(self.config.get('transcription.enable_realtime_transcription', False))
        realtime_model = self.config.get('transcription.realtime_model', 'tiny.en')
        self.realtime_model_combo.setCurrentText(realtime_model)
        self.realtime_pause_input.setText(str(self.config.get('transcription.realtime_processing_pause', 0.1)))
        # VAD settings
        silero_sens = self.config.get('transcription.silero_sensitivity', 0.4)
@@ -417,13 +628,23 @@ class SettingsDialog(QDialog):
        self.post_silence_input.setText(str(self.config.get('transcription.post_speech_silence_duration', 0.3)))
        self.min_recording_input.setText(str(self.config.get('transcription.min_length_of_recording', 0.5)))
        self.pre_buffer_input.setText(str(self.config.get('transcription.pre_recording_buffer_duration', 0.2)))
        self.continuous_mode_check.setChecked(self.config.get('transcription.continuous_mode', False))
        # Display settings
        self.timestamps_check.setChecked(self.config.get('display.show_timestamps', True))
        self.maxlines_input.setText(str(self.config.get('display.max_lines', 100)))
        # Display font settings
        display_font_source = self.config.get('display.font_source', 'System Font')
        self.display_font_source_combo.setCurrentText(display_font_source)
        font_family = self.config.get('display.font_family', 'Courier')
        self.font_family_combo.setCurrentText(font_family)
        self.display_websafe_combo.setCurrentText(self.config.get('display.websafe_font', 'Arial'))
        display_google_font = self.config.get('display.google_font', 'Roboto')
        if display_google_font:
            self.display_google_font_combo.setCurrentText(display_google_font)
        self.display_custom_font_input.setText(self.config.get('display.custom_font_file', ''))
        self._on_display_font_source_changed(display_font_source)
        self.font_size_input.setText(str(self.config.get('display.font_size', 12)))
        self.fade_seconds_input.setText(str(self.config.get('display.fade_after_seconds', 10)))
@@ -434,6 +655,12 @@ class SettingsDialog(QDialog):
        self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
        self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
        # Remote processing settings
        self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
        self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
        self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
        self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
    def _save_settings(self):
        """Save settings to config."""
        try:
@@ -459,6 +686,7 @@ class SettingsDialog(QDialog):
            # Realtime preview
            self.config.set('transcription.enable_realtime_transcription', self.realtime_enabled_check.isChecked())
            self.config.set('transcription.realtime_model', self.realtime_model_combo.currentText())
            self.config.set('transcription.realtime_processing_pause', float(self.realtime_pause_input.text()))
            # VAD settings
            self.config.set('transcription.silero_sensitivity', self.silero_slider.value() / 100.0)
@@ -469,12 +697,20 @@ class SettingsDialog(QDialog):
            self.config.set('transcription.post_speech_silence_duration', float(self.post_silence_input.text()))
            self.config.set('transcription.min_length_of_recording', float(self.min_recording_input.text()))
            self.config.set('transcription.pre_recording_buffer_duration', float(self.pre_buffer_input.text()))
            self.config.set('transcription.continuous_mode', self.continuous_mode_check.isChecked())
            # Display settings
            self.config.set('display.show_timestamps', self.timestamps_check.isChecked())
            max_lines = int(self.maxlines_input.text())
            self.config.set('display.max_lines', max_lines)
            # Display font settings (also used for server sync)
            self.config.set('display.font_source', self.display_font_source_combo.currentText())
            self.config.set('display.font_family', self.font_family_combo.currentText())
            self.config.set('display.websafe_font', self.display_websafe_combo.currentText())
            self.config.set('display.google_font', self.display_google_font_combo.currentText())
            self.config.set('display.custom_font_file', self.display_custom_font_input.text())
            font_size = int(self.font_size_input.text())
            self.config.set('display.font_size', font_size)
            fade_seconds = int(self.fade_seconds_input.text())
@@ -486,6 +722,12 @@ class SettingsDialog(QDialog):
            self.config.set('server_sync.room', self.server_room_input.text())
            self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
            # Remote processing settings
            self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
            self.config.set('remote_processing.server_url', self.remote_url_input.text())
            self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
            self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
            # Call save callback (which will show the success message)
            if self.on_save:
                self.on_save()
--- a/gui/transcription_display_qt.py
+++ b/gui/transcription_display_qt.py
@@ -1,7 +1,7 @@
 """PySide6 transcription display widget for showing real-time transcriptions."""
 from PySide6.QtWidgets import QTextEdit
-from PySide6.QtGui import QFont, QTextCursor
+from PySide6.QtGui import QFont, QTextCursor, QTextCharFormat, QColor
 from PySide6.QtCore import Qt, Slot
 from datetime import datetime
@@ -28,6 +28,10 @@ class TranscriptionDisplay(QTextEdit):
        self.font_family = font_family
        self.font_size = font_size
        # Track the current preview line for two-stage transcription
        self.preview_line_index = -1  # -1 means no active preview
        self.preview_block_number = -1  # Block number for the preview line
        # Configure text widget
        self.setReadOnly(True)
        self.setFont(QFont(font_family, font_size))
@@ -43,6 +47,36 @@ class TranscriptionDisplay(QTextEdit):
            }
        """)
    def _format_line(self, text: str, user_name: str, timestamp: datetime, is_preview: bool = False) -> str:
        """
        Format a transcription line.
        Args:
            text: Transcription text
            user_name: User/speaker name
            timestamp: Timestamp of transcription
            is_preview: Whether this is a preview line
        Returns:
            Formatted line string
        """
        line_parts = []
        if self.show_timestamps:
            time_str = timestamp.strftime("%H:%M:%S")
            line_parts.append(f"[{time_str}]")
        if user_name and user_name.strip():
            line_parts.append(f"{user_name}:")
        # Add preview indicator for visual distinction
        if is_preview:
            line_parts.append(f"[...] {text}")
        else:
            line_parts.append(text)
        return " ".join(line_parts)
    @Slot(str, str)
    def add_transcription(self, text: str, user_name: str = "", timestamp: datetime = None):
        """
@@ -56,35 +90,130 @@ class TranscriptionDisplay(QTextEdit):
        if timestamp is None:
            timestamp = datetime.now()
-        # Build the display line
+        line = self._format_line(text, user_name, timestamp, is_preview=False)
        line_parts = []
-        if self.show_timestamps:
+        # If there's an active preview, replace it instead of appending
-            time_str = timestamp.strftime("%H:%M:%S")
+        if self.preview_line_index >= 0:
-            line_parts.append(f"[{time_str}]")
+            self._replace_preview_with_final(line)
-
+        else:
-        if user_name:
+            # Add to display normally
-            line_parts.append(f"{user_name}:")
+            self.append(line)
-
+            self.line_count += 1
        line_parts.append(text)
        line = " ".join(line_parts)
        # Add to display
        self.append(line)
        # Auto-scroll to bottom
        cursor = self.textCursor()
        cursor.movePosition(QTextCursor.End)
        self.setTextCursor(cursor)
        # Track line count
        self.line_count += 1
        # Remove old lines if exceeding max
        if self.line_count > self.max_lines:
            self._remove_oldest_lines(self.line_count - self.max_lines)
    @Slot(str, str)
    def add_preview(self, text: str, user_name: str = "", timestamp: datetime = None):
        """
        Add a preview transcription that will be replaced by the final transcription.
        Args:
            text: Preview transcription text
            user_name: User/speaker name
            timestamp: Timestamp of transcription
        """
        if timestamp is None:
            timestamp = datetime.now()
        line = self._format_line(text, user_name, timestamp, is_preview=True)
        # If there's already a preview, replace it
        if self.preview_line_index >= 0:
            self._replace_preview_line(line)
        else:
            # Add new preview line
            cursor = self.textCursor()
            cursor.movePosition(QTextCursor.End)
            # Apply italic formatting for preview
            fmt = QTextCharFormat()
            fmt.setFontItalic(True)
            if self.line_count > 0:
                cursor.insertText("\n")
            cursor.insertText(line, fmt)
            self.preview_line_index = self.line_count
            self.preview_block_number = self.document().blockCount() - 1
            self.line_count += 1
        # Auto-scroll to bottom
        cursor = self.textCursor()
        cursor.movePosition(QTextCursor.End)
        self.setTextCursor(cursor)
    def _replace_preview_line(self, new_text: str):
        """Replace the current preview line with new preview text."""
        if self.preview_block_number < 0:
            return
        doc = self.document()
        block = doc.findBlockByNumber(self.preview_block_number)
        if block.isValid():
            cursor = QTextCursor(block)
            cursor.select(QTextCursor.BlockUnderCursor)
            # Apply italic formatting for preview
            fmt = QTextCharFormat()
            fmt.setFontItalic(True)
            cursor.removeSelectedText()
            cursor.insertText(new_text, fmt)
    def _replace_preview_with_final(self, final_text: str):
        """Replace the preview line with final transcription."""
        if self.preview_block_number < 0:
            # No preview to replace, just add normally
            self.append(final_text)
            self.line_count += 1
            self.preview_line_index = -1
            self.preview_block_number = -1
            return
        doc = self.document()
        block = doc.findBlockByNumber(self.preview_block_number)
        if block.isValid():
            cursor = QTextCursor(block)
            cursor.select(QTextCursor.BlockUnderCursor)
            # Apply normal formatting for final text
            fmt = QTextCharFormat()
            fmt.setFontItalic(False)
            fmt.setForeground(QColor(255, 255, 255))  # White for final
            cursor.removeSelectedText()
            cursor.insertText(final_text, fmt)
        # Clear preview tracking
        self.preview_line_index = -1
        self.preview_block_number = -1
    def clear_preview(self):
        """Clear the current preview without adding a final transcription."""
        if self.preview_block_number >= 0:
            doc = self.document()
            block = doc.findBlockByNumber(self.preview_block_number)
            if block.isValid():
                cursor = QTextCursor(block)
                cursor.select(QTextCursor.BlockUnderCursor)
                cursor.removeSelectedText()
                cursor.deleteChar()  # Remove newline
                self.line_count -= 1
        self.preview_line_index = -1
        self.preview_block_number = -1
    def _remove_oldest_lines(self, num_lines: int):
        """
        Remove oldest lines from the display.
@@ -102,10 +231,20 @@ class TranscriptionDisplay(QTextEdit):
        self.line_count -= num_lines
        # Adjust preview tracking if lines were removed
        if self.preview_line_index >= 0:
            self.preview_line_index -= num_lines
            self.preview_block_number -= num_lines
            if self.preview_line_index < 0:
                self.preview_line_index = -1
                self.preview_block_number = -1
    def clear_all(self):
        """Clear all transcriptions."""
        self.clear()
        self.line_count = 0
        self.preview_line_index = -1
        self.preview_block_number = -1
    def get_all_text(self) -> str:
        """
--- a/main.py
+++ b/main.py
@@ -41,43 +41,68 @@ if getattr(sys, 'frozen', False) and sys.platform == 'win32':
        sys.stderr = io.StringIO()
 # Add project root to Python path
-project_root = Path(__file__).parent
+# Use resolve() to follow symlinks and get the real path
 project_root = Path(__file__).resolve().parent
 sys.path.insert(0, str(project_root))
-from PySide6.QtWidgets import QApplication, QSplashScreen
+# Change working directory to project root so relative paths work
-from PySide6.QtGui import QPixmap, QPainter, QColor, QFont
+os.chdir(project_root)
-from PySide6.QtCore import Qt, QTimer
+
-from gui.main_window_qt import MainWindow
+# Import only minimal Qt components needed for splash and dialogs
 # Heavy imports (MainWindow) are deferred until after splash is shown
 from PySide6.QtWidgets import QApplication, QSplashScreen, QMessageBox
 from PySide6.QtGui import QPixmap, QPainter, QColor, QFont, QIcon
 from PySide6.QtCore import Qt
 # Import single instance lock (lightweight module)
 from client.instance_lock import InstanceLock
 def get_icon_path():
    """Get the application icon path."""
    if getattr(sys, 'frozen', False):
        # Running in PyInstaller bundle
        return Path(sys._MEIPASS) / "LocalTranscription.png"
    else:
        # Running in normal Python
        return project_root / "LocalTranscription.png"
 def create_splash_pixmap(message="Loading..."):
-    """Create a pixmap for the splash screen with a custom message."""
+    """Create a pixmap for the splash screen with the app icon."""
-    pixmap = QPixmap(500, 300)
+    pixmap = QPixmap(400, 320)
    pixmap.fill(QColor("#2b2b2b"))
    # Draw on the pixmap
    painter = QPainter(pixmap)
    painter.setRenderHint(QPainter.Antialiasing)
    painter.setRenderHint(QPainter.SmoothPixmapTransform)
-    # Draw title
+    # Load and draw the icon
-    title_font = QFont("Arial", 28, QFont.Bold)
+    icon_path = get_icon_path()
-    painter.setFont(title_font)
+    if icon_path.exists():
-    painter.setPen(QColor("#ffffff"))
+        icon_pixmap = QPixmap(str(icon_path))
-    painter.drawText(pixmap.rect(), Qt.AlignCenter, "Local Transcription")
+        # Scale icon to fit nicely (200x200)
        scaled_icon = icon_pixmap.scaled(200, 200, Qt.KeepAspectRatio, Qt.SmoothTransformation)
        # Center the icon horizontally, position it in upper portion
        icon_x = (pixmap.width() - scaled_icon.width()) // 2
        icon_y = 30
        painter.drawPixmap(icon_x, icon_y, scaled_icon)
-    # Draw subtitle
+    # Draw loading message below icon
    subtitle_font = QFont("Arial", 12)
    painter.setFont(subtitle_font)
    painter.setPen(QColor("#888888"))
-    subtitle_rect = pixmap.rect().adjusted(0, 60, 0, 0)
+    subtitle_rect = pixmap.rect().adjusted(0, 0, 0, -40)
-    painter.drawText(subtitle_rect, Qt.AlignCenter, message)
+    painter.drawText(subtitle_rect, Qt.AlignHCenter | Qt.AlignBottom, message)
    # Draw version/status at bottom
    from version import __version__
    status_font = QFont("Arial", 10)
    painter.setFont(status_font)
    painter.setPen(QColor("#666666"))
-    status_rect = pixmap.rect().adjusted(0, 0, 0, -20)
+    status_rect = pixmap.rect().adjusted(0, 0, 0, -15)
-    painter.drawText(status_rect, Qt.AlignHCenter | Qt.AlignBottom, "Please wait...")
+    painter.drawText(status_rect, Qt.AlignHCenter | Qt.AlignBottom, f"v{__version__}")
    painter.end()
    return pixmap
@@ -93,11 +118,14 @@ def create_splash_screen():
 def main():
    """Main application entry point."""
    # Instance lock for cleanup on exit
    instance_lock = None
    try:
        print("Starting Local Transcription Application...")
        print("=" * 50)
-        # Create Qt application
+        # Create Qt application first (needed for dialogs)
        app = QApplication(sys.argv)
        # Set application info
@@ -105,19 +133,24 @@ def main():
        app.setOrganizationName("LocalTranscription")
        # Set application icon
-        # In PyInstaller frozen executables, use _MEIPASS for bundled files
+        icon_path = get_icon_path()
        if getattr(sys, 'frozen', False):
            # Running in PyInstaller bundle
            icon_path = Path(sys._MEIPASS) / "LocalTranscription.png"
        else:
            # Running in normal Python
            icon_path = project_root / "LocalTranscription.png"
        if icon_path.exists():
            from PySide6.QtGui import QIcon
            app.setWindowIcon(QIcon(str(icon_path)))
-        # Create and show splash screen
+        # Check for single instance BEFORE showing splash
        instance_lock = InstanceLock()
        if not instance_lock.acquire():
            # Another instance is already running
            QMessageBox.warning(
                None,
                "Application Already Running",
                "Local Transcription is already running.\n\n"
                "Please check your taskbar or system tray for the existing instance.",
                QMessageBox.Ok
            )
            sys.exit(0)
        # Create and show splash screen IMMEDIATELY
        splash = create_splash_screen()
        splash.show()
        app.processEvents()  # Make sure splash is visible
@@ -126,6 +159,13 @@ def main():
        splash.showMessage("Loading configuration...", Qt.AlignBottom | Qt.AlignCenter, QColor("#888888"))
        app.processEvents()
        # NOW import heavy modules (after splash is visible)
        # This is the slow part - importing MainWindow loads many dependencies
        splash.showMessage("Loading application modules...", Qt.AlignBottom | Qt.AlignCenter, QColor("#888888"))
        app.processEvents()
        from gui.main_window_qt import MainWindow
        # Create main window (this takes time due to model loading)
        # Pass splash to window so it can update the message
        window = MainWindow(splash_screen=splash)
@@ -135,15 +175,25 @@ def main():
        window.show()
        # Run application
-        sys.exit(app.exec())
+        exit_code = app.exec()
        # Release lock on normal exit
        if instance_lock:
            instance_lock.release()
        sys.exit(exit_code)
    except KeyboardInterrupt:
        print("\nApplication interrupted by user")
        if instance_lock:
            instance_lock.release()
        sys.exit(0)
    except Exception as e:
        print(f"Fatal error: {e}")
        import traceback
        traceback.print_exc()
        if instance_lock:
            instance_lock.release()
        sys.exit(1)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "local-transcription"
-version = "0.1.0"
+version = "1.0.0"
 description = "A standalone desktop application for real-time speech-to-text transcription using Whisper models"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/server/COMPARISON.md
+++ b/server/COMPARISON.md
@@ -1,308 +0,0 @@
 # Multi-User Server Comparison
 ## TL;DR: Which Should You Use?
 | Situation | Recommended Solution |
 |-----------|---------------------|
 | **Shared hosting (cPanel, etc.)** | **PHP Polling** (display-polling.php) |
 | **VPS or cloud server** | **Node.js** (best performance) |
 | **Quick test/demo** | **PHP Polling** (easiest) |
 | **Production with many users** | **Node.js** (most reliable) |
 | **No server access** | Use local-only mode |
 ## Detailed Comparison
 ### 1. PHP with SSE (Original - server.php + display.php)
 **Status:** ⚠️ **PROBLEMATIC** - Not recommended
 **Problems:**
 - PHP-FPM buffers output (SSE doesn't work)
 - Apache/Nginx proxy timeouts
 - Shared hosting often blocks long connections
 - High resource usage (one PHP process per viewer)
 **When it might work:**
 - Only with specific Apache configurations
 - Not on shared hosting with PHP-FPM
 - Requires `ProxyTimeout` settings
 **Verdict:** ❌ Avoid unless you have full server control and can configure Apache properly
 ---
 ### 2. PHP with Polling (NEW - display-polling.php)
 **Status:** ✅ **RECOMMENDED for PHP**
 **Pros:**
 - ✅ Works on ANY shared hosting
 - ✅ No buffering issues
 - ✅ No special configuration needed
 - ✅ Simple to deploy (just upload files)
 - ✅ Uses standard HTTP requests
 **Cons:**
 - ❌ Higher latency (1-2 seconds)
 - ❌ More server requests (polls every second)
 - ❌ Slightly higher bandwidth
 **Performance:**
 - Latency: 1-2 seconds
 - Max users: 20-30 concurrent viewers
 - Resource usage: Moderate
 **Best for:**
 - Shared hosting (cPanel, Bluehost, etc.)
 - Quick deployment
 - Small to medium groups
 **Setup:**
 ```bash
 # Just upload these files:
 server.php
 display-polling.php  # ← Use this instead of display.php
 config.php
 ```
 **OBS URL:**
 ```
 https://your-site.com/transcription/display-polling.php?room=ROOM&fade=10
 ```
 ---
 ### 3. Node.js Server (NEW - server/nodejs/)
 **Status:** ⭐ **BEST PERFORMANCE**
 **Pros:**
 - ✅ Native WebSocket support
 - ✅ Real-time updates (< 100ms latency)
 - ✅ Handles 100+ concurrent connections easily
 - ✅ Lower resource usage
 - ✅ No buffering issues
 - ✅ Event-driven architecture
 **Cons:**
 - ❌ Requires VPS or cloud server
 - ❌ Need to install Node.js
 - ❌ More setup than PHP
 **Performance:**
 - Latency: < 100ms
 - Max users: 500+ concurrent
 - Resource usage: Very low (~50MB RAM)
 **Best for:**
 - Production deployments
 - Large groups (10+ streamers)
 - Professional use
 - Anyone with a VPS
 **Setup:**
 ```bash
 cd server/nodejs
 npm install
 npm start
 ```
 **Free hosting options:**
 - Railway.app (free tier)
 - Heroku (free tier)
 - Fly.io (free tier)
 - Any $5/month VPS (DigitalOcean, Linode)
 **OBS URL:**
 ```
 http://your-server.com:3000/display?room=ROOM&fade=10
 ```
 ---
 ## Feature Comparison Matrix
 | Feature | PHP SSE | PHP Polling | Node.js |
 |---------|---------|-------------|---------|
 | **Real-time** | ⚠️ Should be, but breaks | ⚠️ 1-2s delay | ✅ < 100ms |
 | **Reliability** | ❌ Buffering issues | ✅ Very reliable | ✅ Very reliable |
 | **Shared Hosting** | ❌ Usually fails | ✅ Works everywhere | ❌ Needs VPS |
 | **Setup Difficulty** | 🟡 Medium | 🟢 Easy | 🟡 Medium |
 | **Max Users** | 10 | 30 | 500+ |
 | **Resource Usage** | High | Medium | Low |
 | **Latency** | Should be instant, but... | 1-2 seconds | < 100ms |
 | **Cost** | $5-10/month hosting | $5-10/month hosting | Free - $5/month |
 ---
 ## Migration Guide
 ### From PHP SSE to PHP Polling
 **Super easy - just change the URL:**
 Old:
 ```
 https://your-site.com/transcription/display.php?room=ROOM
 ```
 New:
 ```
 https://your-site.com/transcription/display-polling.php?room=ROOM
 ```
 Everything else stays the same! The desktop app doesn't need changes.
 ---
 ### From PHP to Node.js
 **1. Deploy Node.js server** (see server/nodejs/README.md)
 **2. Update desktop app settings:**
 Old (PHP):
 ```
 Server URL: https://your-site.com/transcription/server.php
 ```
 New (Node.js):
 ```
 Server URL: http://your-server.com:3000/api/send
 ```
 **3. Update OBS browser source:**
 Old (PHP):
 ```
 https://your-site.com/transcription/display.php?room=ROOM
 ```
 New (Node.js):
 ```
 http://your-server.com:3000/display?room=ROOM&fade=10
 ```
 ---
 ## Testing Your Setup
 ### Test PHP Polling
 1. Upload files to server
 2. Visit: `https://your-site.com/transcription/server.php`
   - Should see JSON response
 3. Visit: `https://your-site.com/transcription/display-polling.php?room=test`
   - Should see "🟡 Waiting for data..."
 4. Send a test message:
   ```bash
   curl -X POST "https://your-site.com/transcription/server.php?action=send" \
     -H "Content-Type: application/json" \
     -d '{
       "room": "test",
       "passphrase": "testpass",
       "user_name": "TestUser",
       "text": "Hello World",
       "timestamp": "12:34:56"
     }'
   ```
 5. Display should show "Hello World" within 1-2 seconds
 ### Test Node.js
 1. Start server: `npm start`
 2. Visit: `http://localhost:3000`
   - Should see JSON response
 3. Visit: `http://localhost:3000/display?room=test`
   - Should see "⚫ Connecting..." then "🟢 Connected"
 4. Send test message (same curl as above, but to `http://localhost:3000/api/send`)
 5. Display should show message instantly
 ---
 ## Troubleshooting
 ### PHP Polling Issues
 **"Status stays yellow"**
 - Room doesn't exist yet
 - Send a message from desktop app first
 **"Gets 500 error"**
 - Check PHP error logs
 - Verify `data/` directory is writable
 **"Slow updates (5+ seconds)"**
 - Increase poll interval: `?poll=500` (500ms)
 - Check server load
 ### Node.js Issues
 **"Cannot connect"**
 - Check firewall allows port 3000
 - Verify server is running: `curl http://localhost:3000`
 **"WebSocket failed"**
 - Check browser console for errors
 - Try different port
 - Check reverse proxy settings if using Nginx
 ---
 ## Recommendations by Use Case
 ### Solo Streamer (Local Only)
 **Use:** Built-in web server (no multi-user server needed)
 - Just run the desktop app
 - OBS: `http://localhost:8080`
 ### 2-3 Friends on Shared Hosting
 **Use:** PHP Polling
 - Upload to your existing web hosting
 - Cost: $0 (use existing hosting)
 - Setup time: 5 minutes
 ### 5+ Streamers, Want Best Quality
 **Use:** Node.js on VPS
 - Deploy to Railway.app (free) or DigitalOcean ($5/month)
 - Real-time updates
 - Professional quality
 ### Large Event/Convention
 **Use:** Node.js on cloud
 - Deploy to AWS/Azure/GCP
 - Use load balancer for redundancy
 - Can handle hundreds of users
 ---
 ## Cost Breakdown
 ### PHP Polling
 - **Shared hosting:** $5-10/month (or free if you already have hosting)
 - **Total:** $5-10/month
 ### Node.js
 - **Free options:**
  - Railway.app (500 hours/month free)
  - Heroku (free dyno)
  - Fly.io (free tier)
 - **Paid options:**
  - DigitalOcean Droplet: $5/month
  - Linode: $5/month
  - AWS EC2 t2.micro: $8/month (or free tier)
 - **Total:** $0-8/month
 ### Just Use Local Mode
 - **Cost:** $0
 - **Limitation:** Only shows your own transcriptions (no multi-user sync)
 ---
 ## Final Recommendation
 **For most users:** Start with **PHP Polling** on shared hosting. It works reliably and is dead simple.
 **If you want the best:** Use **Node.js** - it's worth the extra setup for the performance.
 **For testing:** Use **local mode** (no server) - built into the desktop app.
--- a/server/QUICK_FIX.md
+++ b/server/QUICK_FIX.md
@@ -1,218 +0,0 @@
 # Quick Fix for Multi-User Display Issues
 ## The Problem
 Your PHP SSE (Server-Sent Events) setup isn't working because:
 1. **PHP-FPM buffers output** - Shared hosting uses PHP-FPM which buffers everything
 2. **Apache/Nginx timeouts** - Proxy kills long connections
 3. **SSE isn't designed for PHP** - PHP processes are meant to be short-lived
 ## The Solutions (in order of recommendation)
 ---
 ### ✅ Solution 1: Use PHP Polling (Easiest Fix)
 **What changed:** Instead of SSE (streaming), use regular HTTP polling every 1 second
 **Files affected:**
 - **Keep:** `server.php`, `config.php` (no changes needed)
 - **Replace:** Use `display-polling.php` instead of `display.php`
 **Setup:**
 1. Upload `display-polling.php` to your server
 2. Change your OBS Browser Source URL from:
   ```
   OLD: https://your-site.com/transcription/display.php?room=ROOM
   NEW: https://your-site.com/transcription/display-polling.php?room=ROOM
   ```
 3. Done! No other changes needed.
 **Pros:**
 - ✅ Works on ANY shared hosting
 - ✅ No server configuration needed
 - ✅ Uses your existing setup
 - ✅ 5-minute fix
 **Cons:**
 - ⚠️ 1-2 second latency (vs instant with WebSocket)
 - ⚠️ More server requests (but minimal impact)
 **Performance:** Good for 2-20 concurrent users
 ---
 ### ⭐ Solution 2: Use Node.js Server (Best Performance)
 **What changed:** Switch from PHP to Node.js - designed for real-time
 **Setup:**
 1. Get a VPS (or use free hosting like Railway.app)
 2. Install Node.js:
   ```bash
   cd server/nodejs
   npm install
   npm start
   ```
 3. Update desktop app Server URL to:
   ```
   http://your-server.com:3000/api/send
   ```
 4. Update OBS URL to:
   ```
   http://your-server.com:3000/display?room=ROOM
   ```
 **Pros:**
 - ✅ Real-time (< 100ms latency)
 - ✅ Handles 100+ users easily
 - ✅ Native WebSocket support
 - ✅ Lower resource usage
 - ✅ Can use free hosting (Railway, Heroku, Fly.io)
 **Cons:**
 - ❌ Requires VPS or cloud hosting (can't use shared hosting)
 - ❌ More setup than PHP
 **Performance:** Excellent for any number of users
 **Free Hosting Options:**
 - Railway.app (easiest - just connect GitHub)
 - Heroku (free tier)
 - Fly.io (free tier)
 ---
 ### 🔧 Solution 3: Fix PHP SSE (Advanced - Not Recommended)
 **Only if you have full server control and really want SSE**
 This requires:
 1. Apache configuration changes
 2. Disabling output buffering
 3. Increasing timeouts
 See `apache-sse-config.conf` for details.
 **Not recommended because:** It's complex, fragile, and PHP polling is easier and more reliable.
 ---
 ## Quick Comparison
 | Solution | Setup Time | Reliability | Latency | Works on Shared Hosting? |
 |----------|-----------|-------------|---------|-------------------------|
 | **PHP Polling** | 5 min | ⭐⭐⭐⭐⭐ | 1-2s | ✅ Yes |
 | **Node.js** | 30 min | ⭐⭐⭐⭐⭐ | < 100ms | ❌ No (needs VPS) |
 | **PHP SSE** | 2 hours | ⭐⭐ | Should be instant | ❌ Rarely |
 ---
 ## Testing Your Fix
 ### Test PHP Polling
 1. Run the test script:
   ```bash
   cd server
   ./test-server.sh
   ```
 2. Or manually:
   ```bash
   # Send a test message
   curl -X POST "https://your-site.com/transcription/server.php?action=send" \
     -H "Content-Type: application/json" \
     -d '{
       "room": "test",
       "passphrase": "testpass",
       "user_name": "TestUser",
       "text": "Hello World",
       "timestamp": "12:34:56"
     }'
   # Open in browser:
   https://your-site.com/transcription/display-polling.php?room=test
   # Should see "Hello World" appear within 1-2 seconds
   ```
 ### Test Node.js
 1. Start server:
   ```bash
   cd server/nodejs
   npm install
   npm start
   ```
 2. Open browser:
   ```
   http://localhost:3000/display?room=test
   ```
 3. Send test message:
   ```bash
   curl -X POST "http://localhost:3000/api/send" \
     -H "Content-Type: application/json" \
     -d '{
       "room": "test",
       "passphrase": "testpass",
       "user_name": "TestUser",
       "text": "Hello World",
       "timestamp": "12:34:56"
     }'
   ```
 4. Should see message appear **instantly**
 ---
 ## My Recommendation
 **Start with PHP Polling** (Solution 1):
 - Upload `display-polling.php`
 - Change OBS URL
 - Test it out
 **If you like it and want better performance**, migrate to Node.js (Solution 2):
 - Takes 30 minutes
 - Much better performance
 - Can use free hosting
 **Forget about PHP SSE** (Solution 3):
 - Too much work
 - Unreliable
 - Not worth it
 ---
 ## Files You Need
 ### For PHP Polling
 - ✅ `server.php` (already have)
 - ✅ `config.php` (already have)
 - ✅ `display-polling.php` (NEW - just created)
 - ❌ `display.php` (don't use anymore)
 ### For Node.js
 - ✅ `server/nodejs/server.js` (NEW)
 - ✅ `server/nodejs/package.json` (NEW)
 - ✅ `server/nodejs/README.md` (NEW)
 ---
 ## Need Help?
 1. Read [COMPARISON.md](COMPARISON.md) for detailed comparison
 2. Read [server/nodejs/README.md](nodejs/README.md) for Node.js setup
 3. Run `./test-server.sh` to diagnose issues
 4. Check browser console for errors
 ---
 ## Bottom Line
 **Your SSE display doesn't work because PHP + shared hosting + SSE = bad combo.**
 **Use PHP Polling (1-2s delay) or Node.js (instant).** Both work reliably.
--- a/server/SYNC_PERFORMANCE.md
+++ b/server/SYNC_PERFORMANCE.md
@@ -1,248 +0,0 @@
 # Server Sync Performance - Before vs After
 ## The Problem You Experienced
 **Symptom:** Shared sync display was several seconds behind local transcription
 **Why:** The test script worked fast because it sent ONE message. But the Python app sends messages continuously during speech, and they were getting queued up!
 ---
 ## Before Fix: Serial Processing ❌
 ```
 You speak:    "Hello"  "How"  "are"  "you"  "today"
               ↓        ↓      ↓      ↓      ↓
 Local GUI:    Hello    How    are    you    today  ← Instant!
               ↓        ↓      ↓      ↓      ↓
 Send Queue:   [Hello]→[How]→[are]→[you]→[today]
               |
               ↓ (Wait for HTTP response before sending next)
 HTTP:         ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
              Send     Send   Send   Send   Send
              Hello    How    are    you    today
              (200ms)  (200ms)(200ms)(200ms)(200ms)
              ↓        ↓      ↓      ↓      ↓
 Server:       Hello    How    are    you    today
               ↓        ↓      ↓      ↓      ↓
 Display:      Hello    How    are    you    today  ← 1 second behind!
              (0ms)    (200ms)(400ms)(600ms)(800ms)
 ```
 **Total delay:** 1 second for 5 messages!
 ---
 ## After Fix: Parallel Processing ✅
 ```
 You speak:    "Hello"  "How"  "are"  "you"  "today"
               ↓        ↓      ↓      ↓      ↓
 Local GUI:    Hello    How    are    you    today  ← Instant!
               ↓        ↓      ↓      ↓      ↓
 Send Queue:   [Hello]  [How]  [are]  [you]  [today]
               ↓        ↓      ↓
               ↓        ↓      ↓  ← Up to 3 parallel workers!
 HTTP:         ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
              Send Hello  ┐
              Send How    ├─ All sent simultaneously!
              Send are    ┘
              Wait for free worker...
              Send you    ┐
              Send today  ┘
              (200ms total!)
              ↓        ↓      ↓      ↓      ↓
 Server:       Hello    How    are    you    today
               ↓        ↓      ↓      ↓      ↓
 Display:      Hello    How    are    you    today  ← 200ms behind!
              (0ms)    (0ms)  (0ms)  (0ms)  (200ms)
 ```
 **Total delay:** 200ms for 5 messages!
 ---
 ## Real-World Example
 ### Scenario: You speak a paragraph
 **"Hello everyone. How are you doing today? I'm testing the transcription system."**
 ### Before Fix (Serial)
 ```
 Time    Local GUI                  Server Display
 0.0s    "Hello everyone."
 0.2s    "How are you doing today?"
 0.4s    "I'm testing..."           "Hello everyone." ← 0.4s behind!
 0.6s                               "How are you doing..." ← 0.4s behind!
 0.8s                               "I'm testing..." ← 0.4s behind!
 ```
 ### After Fix (Parallel)
 ```
 Time    Local GUI                  Server Display
 0.0s    "Hello everyone."
 0.2s    "How are you doing today?" "Hello everyone." ← 0.2s behind!
 0.4s    "I'm testing..."           "How are you doing..." ← 0.2s behind!
 0.6s                               "I'm testing..." ← 0.2s behind!
 ```
 **Improvement:** Consistent 200ms delay vs growing 400-800ms delay!
 ---
 ## Technical Details
 ### Problem 1: Wrong URL Format ❌
 ```python
 # What the client was sending to Node.js:
 POST http://localhost:3000/api/send?action=send
 # What Node.js was expecting:
 POST http://localhost:3000/api/send
 ```
 **Fix:** Auto-detect server type
 ```python
 if 'server.php' in url:
    # PHP server needs ?action=send
    POST http://server.com/server.php?action=send
 else:
    # Node.js doesn't need it
    POST http://server.com/api/send
 ```
 ### Problem 2: Blocking HTTP Requests ❌
 ```python
 # Old code (BLOCKING):
 while True:
    message = queue.get()
    send_http(message)  # ← Wait here! Can't send next until this returns
 ```
 **Fix:** Use thread pool
 ```python
 # New code (NON-BLOCKING):
 executor = ThreadPoolExecutor(max_workers=3)
 while True:
    message = queue.get()
    executor.submit(send_http, message)  # ← Returns immediately! Send next!
 ```
 ### Problem 3: Long Timeouts ❌
 ```python
 # Old:
 queue.get(timeout=1.0)  # Wait up to 1 second for new message
 send_http(..., timeout=5.0)  # Wait up to 5 seconds for response
 # New:
 queue.get(timeout=0.1)  # Check queue every 100ms (responsive!)
 send_http(..., timeout=2.0)  # Fail fast if server slow
 ```
 ---
 ## Performance Metrics
 | Metric | Before | After | Improvement |
 |--------|--------|-------|-------------|
 | Single message | 150ms | 150ms | Same |
 | 5 messages (serial) | 750ms | 200ms | **3.7x faster** |
 | 10 messages (serial) | 1500ms | 300ms | **5x faster** |
 | 20 messages (rapid) | 3000ms | 600ms | **5x faster** |
 | Queue polling | 1000ms | 100ms | **10x faster** |
 | Failure timeout | 5000ms | 2000ms | **2.5x faster** |
 ---
 ## Visual Comparison
 ### Before: Messages in Queue Building Up
 ```
 [Message 1] ━━━━━━━━━━━━━━━━━━━━━ Sending... (200ms)
 [Message 2] Waiting...
 [Message 3] Waiting...
 [Message 4] Waiting...
 [Message 5] Waiting...
            ↓
 [Message 1] Done ✓
 [Message 2] ━━━━━━━━━━━━━━━━━━━━━ Sending... (200ms)
 [Message 3] Waiting...
 [Message 4] Waiting...
 [Message 5] Waiting...
            ↓
 ... and so on (total: 1 second for 5 messages)
 ```
 ### After: Messages Sent in Parallel
 ```
 [Message 1] ━━━━━━━━━━━━━━━━━━━━━ Sending... ┐
 [Message 2] ━━━━━━━━━━━━━━━━━━━━━ Sending... ├─ Parallel! (200ms)
 [Message 3] ━━━━━━━━━━━━━━━━━━━━━ Sending... ┘
 [Message 4] Waiting for free worker...
 [Message 5] Waiting for free worker...
            ↓ (workers become available)
 [Message 1] Done ✓
 [Message 2] Done ✓
 [Message 3] Done ✓
 [Message 4] ━━━━━━━━━━━━━━━━━━━━━ Sending... ┐
 [Message 5] ━━━━━━━━━━━━━━━━━━━━━ Sending... ┘
 Total time: 400ms for 5 messages (2.5x faster!)
 ```
 ---
 ## How to Test the Improvement
 1. **Start Node.js server:**
   ```bash
   cd server/nodejs
   npm start
   ```
 2. **Configure desktop app:**
   - Settings → Server Sync → Enable
   - Server URL: `http://localhost:3000/api/send`
   - Room: `test`
   - Passphrase: `test`
 3. **Open display page:**
   ```
   http://localhost:3000/display?room=test&fade=20
   ```
 4. **Test rapid speech:**
   - Start transcription
   - Speak 5-10 sentences quickly in succession
   - Watch both local GUI and web display
 **Expected:** Web display should be only ~200ms behind local GUI (instead of 1-2 seconds)
 ---
 ## Why 3 Workers?
 **Why not 1?** → Serial processing, slow
 **Why not 10?** → Too many connections, overwhelms server
 **Why 3?** → Good balance:
 - Fast enough for rapid speech
 - Doesn't overwhelm server
 - Low resource usage
 You can change this in the code:
 ```python
 self.executor = ThreadPoolExecutor(max_workers=3)  # Change to 5 for faster
 ```
 ---
 ## Summary
 ✅ **Fixed URL format** for Node.js server
 ✅ **Added parallel HTTP requests** (up to 3 simultaneous)
 ✅ **Reduced timeouts** for faster polling and failure detection
 ✅ **Result:** 5-10x faster sync for rapid speech
 **Before:** Laggy, messages queue up, 1-2 second delay
 **After:** Near real-time, 100-300ms delay, smooth!
--- a/server/nodejs/README.md
+++ b/server/nodejs/README.md
@@ -1,15 +1,15 @@
 # Node.js Multi-User Transcription Server
-**Much better than PHP for real-time applications!**
+A real-time multi-user transcription sync server for streamers and teams.
-## Why Node.js is Better Than PHP for This
+## Features
-1. **Native WebSocket Support** - No SSE buffering issues
+- **Real-time WebSocket** - Instant message delivery (< 100ms latency)
-2. **Event-Driven** - Designed for real-time connections
+- **Per-speaker fonts** - Each user can have their own font style
-3. **No Buffering Problems** - PHP-FPM/FastCGI buffering is a nightmare
+- **Google Fonts support** - 1000+ free fonts loaded from CDN
-4. **Lower Latency** - Instant message delivery
+- **Web-safe fonts** - Universal fonts that work everywhere
-5. **Better Resource Usage** - One process handles all connections
+- **Custom font uploads** - Upload your own .ttf/.woff2 files
-6. **Easy to Deploy** - Works on any VPS, cloud platform, or even Heroku free tier
+- **Easy deployment** - Works on any VPS, cloud platform, or locally
 ## Quick Start
@@ -54,13 +54,35 @@ PORT=8080 npm start
 Add a Browser source with this URL:
 ```
-http://your-server.com:3000/display?room=YOUR_ROOM&fade=10&timestamps=true
+http://your-server.com:3000/display?room=YOUR_ROOM&fade=10&timestamps=true&fontsource=websafe&websafefont=Arial
 ```
 **Parameters:**
- `room` - Your room name (required)
+| Parameter | Default | Description |
- `fade` - Seconds before text fades (0 = never fade)
+|-----------|---------|-------------|
- `timestamps` - Show timestamps (true/false)
+| `room` | default | Your room name (required) |
 | `fade` | 10 | Seconds before text fades (0 = never fade) |
 | `timestamps` | true | Show timestamps (true/false) |
 | `maxlines` | 50 | Max lines visible (prevents scroll bars) |
 | `fontsize` | 16 | Font size in pixels |
 | `fontsource` | websafe | Font source: `websafe`, `google`, or `custom` |
 | `websafefont` | Arial | Web-safe font name |
 | `googlefont` | Roboto | Google Font name |
 **Font Examples:**
 ```
 # Web-safe font (works everywhere)
 ?room=myroom&fontsource=websafe&websafefont=Courier+New
 # Google Font (loaded from CDN)
 ?room=myroom&fontsource=google&googlefont=Open+Sans
 # Custom font (uploaded by users)
 ?room=myroom&fontsource=custom
 ```
 **Per-Speaker Fonts:**
 Each user can set their own font in the desktop app (Settings → Multi-User Server Sync → Font Source). Per-speaker fonts override the URL defaults, so different speakers can have different fonts on the same display.
 ## API Endpoints
@@ -74,7 +96,9 @@ Content-Type: application/json
  "passphrase": "my-secret",
  "user_name": "Alice",
  "text": "Hello everyone!",
-  "timestamp": "12:34:56"
+  "timestamp": "12:34:56",
  "font_family": "Open Sans",    // Optional: per-speaker font
  "font_type": "google"          // Optional: websafe, google, or custom
 }
 ```
@@ -282,17 +306,6 @@ Ports below 1024 require root. Either:
 - Average latency: < 100ms
 - Memory usage: ~50MB
 ## Comparison: Node.js vs PHP
 | Feature | Node.js | PHP (SSE) |
 |---------|---------|-----------|
 | Real-time | ✅ WebSocket | ⚠️ SSE (buffering issues) |
 | Latency | < 100ms | 1-5 seconds (buffering) |
 | Connections | 1000+ | Limited by PHP-FPM |
 | Setup | Easy | Complex (Apache/Nginx config) |
 | Hosting | VPS, Cloud | Shared hosting (problematic) |
 | Resource Usage | Low | High (one PHP process per connection) |
 ## License
 Part of the Local Transcription project.
--- a/server/nodejs/server.js
+++ b/server/nodejs/server.js
@@ -27,11 +27,15 @@ const wss = new WebSocket.Server({ server });
 // Configuration
 const PORT = process.env.PORT || 3000;
 const DATA_DIR = path.join(__dirname, 'data');
 const FONTS_DIR = path.join(__dirname, 'fonts');
 const MAX_TRANSCRIPTIONS = 100;
 const CLEANUP_INTERVAL = 2 * 60 * 60 * 1000; // 2 hours
 // In-memory font storage by room (font_name -> {data: Buffer, mime: string})
 const roomFonts = new Map();
 // Middleware
-app.use(bodyParser.json());
+app.use(bodyParser.json({ limit: '10mb' })); // Increase limit for font uploads
 app.use((req, res, next) => {
    res.header('Access-Control-Allow-Origin', '*');
    res.header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
@@ -146,7 +150,8 @@ function broadcastToRoom(room, data) {
    });
    const broadcastTime = Date.now() - broadcastStart;
-    console.log(`[Broadcast] Sent to ${sent} client(s) in room "${room}" (${broadcastTime}ms)`);
+    const fontInfo = data.font_family ? ` [font: ${data.font_family} (${data.font_type})]` : '';
    console.log(`[Broadcast] Sent to ${sent} client(s) in room "${room}" (${broadcastTime}ms)${fontInfo}`);
 }
 // Cleanup old rooms
@@ -418,10 +423,15 @@ app.get('/', (req, res) => {
                            <li><code>timestamps=true</code> - Show/hide timestamps (true/false)</li>
                            <li><code>maxlines=50</code> - Max lines visible at once (prevents scroll bars)</li>
                            <li><code>fontsize=16</code> - Font size in pixels</li>
-                            <li><code>fontfamily=Arial</code> - Font family (Arial, Courier, etc.)</li>
+                            <li><code>fontsource=websafe</code> - Font source: <code>websafe</code>, <code>google</code>, or <code>custom</code></li>
                            <li><code>websafefont=Arial</code> - Web-safe font (Arial, Times New Roman, Courier New, etc.)</li>
                            <li><code>googlefont=Roboto</code> - Google Font name (Roboto, Open Sans, Lato, etc.)</li>
                        </ul>
                        <p style="font-size: 0.85em; color: #888; margin-top: 10px;">
-                            Example: <code>?room=myroom&fade=15&timestamps=false&maxlines=30&fontsize=18</code>
+                            Example: <code>?room=myroom&fade=15&fontsource=google&googlefont=Open+Sans&fontsize=18</code>
                        </p>
                        <p style="font-size: 0.85em; color: #888;">
                            Note: Per-speaker fonts override the default. Each user can set their own font in the app settings.
                        </p>
                    </details>
                </div>
@@ -541,7 +551,7 @@ app.get('/', (req, res) => {
            // Build URLs
            const serverUrl = \`http://\${window.location.host}/api/send\`;
-            const displayUrl = \`http://\${window.location.host}/display?room=\${encodeURIComponent(room)}&fade=10&timestamps=true&maxlines=50&fontsize=16&fontfamily=Arial\`;
+            const displayUrl = \`http://\${window.location.host}/display?room=\${encodeURIComponent(room)}&fade=10&timestamps=true&maxlines=50&fontsize=16&fontsource=websafe&websafefont=Arial\`;
            // Update UI
            document.getElementById('serverUrl').textContent = serverUrl;
@@ -592,7 +602,7 @@ app.get('/', (req, res) => {
 app.post('/api/send', async (req, res) => {
    const requestStart = Date.now();
    try {
-        const { room, passphrase, user_name, text, timestamp } = req.body;
+        const { room, passphrase, user_name, text, timestamp, is_preview, font_family, font_type } = req.body;
        if (!room || !passphrase || !user_name || !text) {
            return res.status(400).json({ error: 'Missing required fields' });
@@ -611,17 +621,27 @@ app.post('/api/send', async (req, res) => {
            user_name: user_name.trim(),
            text: text.trim(),
            timestamp: timestamp || new Date().toLocaleTimeString('en-US', { hour12: false }),
-            created_at: Date.now()
+            created_at: Date.now(),
            is_preview: is_preview || false,
            font_family: font_family || null,  // Per-speaker font name
            font_type: font_type || null  // Font type: "websafe", "google", or "custom"
        };
        const addStart = Date.now();
-        await addTranscription(room, transcription);
+        if (is_preview) {
            // Previews are only broadcast, not stored
            broadcastToRoom(room, transcription);
        } else {
            // Final transcriptions are stored and broadcast
            await addTranscription(room, transcription);
        }
        const addTime = Date.now() - addStart;
        const totalTime = Date.now() - requestStart;
-        console.log(`[${new Date().toISOString()}] Transcription received: "${text.substring(0, 50)}..." (verify: ${verifyTime}ms, add: ${addTime}ms, total: ${totalTime}ms)`);
+        const previewLabel = is_preview ? ' [PREVIEW]' : '';
        console.log(`[${new Date().toISOString()}]${previewLabel} Transcription received: "${text.substring(0, 50)}..." (verify: ${verifyTime}ms, add: ${addTime}ms, total: ${totalTime}ms)`);
-        res.json({ status: 'ok', message: 'Transcription added' });
+        res.json({ status: 'ok', message: is_preview ? 'Preview broadcast' : 'Transcription added' });
    } catch (err) {
        console.error('Error in /api/send:', err);
        res.status(500).json({ error: err.message });
@@ -647,9 +667,115 @@ app.get('/api/list', async (req, res) => {
    }
 });
 // Upload fonts for a room
 app.post('/api/fonts', async (req, res) => {
    try {
        const { room, passphrase, fonts } = req.body;
        if (!room || !passphrase) {
            return res.status(400).json({ error: 'Missing room or passphrase' });
        }
        // Verify passphrase
        const valid = await verifyPassphrase(room, passphrase);
        if (!valid) {
            return res.status(401).json({ error: 'Invalid passphrase' });
        }
        if (!fonts || !Array.isArray(fonts)) {
            return res.status(400).json({ error: 'No fonts provided' });
        }
        // Initialize room fonts storage if needed
        if (!roomFonts.has(room)) {
            roomFonts.set(room, new Map());
        }
        const fontsMap = roomFonts.get(room);
        // Process each font
        let addedCount = 0;
        for (const font of fonts) {
            if (!font.name || !font.data || !font.mime) continue;
            // Decode base64 font data
            const fontData = Buffer.from(font.data, 'base64');
            fontsMap.set(font.name, {
                data: fontData,
                mime: font.mime,
                uploaded_at: Date.now()
            });
            addedCount++;
            console.log(`[Fonts] Uploaded font "${font.name}" for room "${room}" (${fontData.length} bytes)`);
        }
        res.json({ status: 'ok', message: `${addedCount} font(s) uploaded`, fonts: Array.from(fontsMap.keys()) });
    } catch (err) {
        console.error('Error in /api/fonts:', err);
        res.status(500).json({ error: err.message });
    }
 });
 // Serve uploaded fonts
 app.get('/fonts/:room/:fontname', (req, res) => {
    const { room, fontname } = req.params;
    const fontsMap = roomFonts.get(room);
    if (!fontsMap) {
        return res.status(404).json({ error: 'Room not found' });
    }
    const font = fontsMap.get(fontname);
    if (!font) {
        return res.status(404).json({ error: 'Font not found' });
    }
    res.set('Content-Type', font.mime);
    res.set('Cache-Control', 'public, max-age=3600');
    res.send(font.data);
 });
 // List fonts for a room
 app.get('/api/fonts', (req, res) => {
    const { room } = req.query;
    if (!room) {
        return res.status(400).json({ error: 'Missing room parameter' });
    }
    const fontsMap = roomFonts.get(room);
    const fonts = fontsMap ? Array.from(fontsMap.keys()) : [];
    res.json({ fonts });
 });
 // Serve display page
 app.get('/display', (req, res) => {
-    const { room = 'default', fade = '10', timestamps = 'true', maxlines = '50', fontsize = '16', fontfamily = 'Arial' } = req.query;
+    const {
        room = 'default',
        fade = '10',
        timestamps = 'true',
        maxlines = '50',
        fontsize = '16',
        fontfamily = 'Arial',
        // New font source parameters
        fontsource = 'websafe',  // websafe, google, or custom
        websafefont = 'Arial',
        googlefont = 'Roboto'
    } = req.query;
    // Determine the effective default font based on fontsource
    let effectiveFont = fontfamily;  // Legacy fallback
    if (fontsource === 'google' && googlefont) {
        effectiveFont = googlefont;
    } else if (fontsource === 'websafe' && websafefont) {
        effectiveFont = websafefont;
    }
    // Generate Google Font link if needed
    // Note: Google Fonts expects spaces as '+' in the URL, not %2B
    const googleFontLink = fontsource === 'google' && googlefont
        ? `<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=${googlefont.replace(/ /g, '+')}&display=swap">`
        : '';
    res.send(`
 <!DOCTYPE html>
@@ -657,12 +783,16 @@ app.get('/display', (req, res) => {
 <head>
    <title>Multi-User Transcription Display</title>
    <meta charset="UTF-8">
    ${googleFontLink}
    <style id="custom-fonts">
        /* Custom fonts will be injected here */
    </style>
    <style>
        body {
            margin: 0;
            padding: 20px;
            background: transparent;
-            font-family: ${fontfamily}, sans-serif;
+            font-family: "${effectiveFont}", sans-serif;
            font-size: ${fontsize}px;
            color: white;
            overflow: hidden;
@@ -681,6 +811,14 @@ app.get('/display', (req, res) => {
        .transcription.fading {
            opacity: 0;
        }
        .transcription.preview {
            font-style: italic;
        }
        .preview-indicator {
            color: #888;
            font-size: 0.85em;
            margin-right: 5px;
        }
        .timestamp {
            color: #888;
            font-size: 0.9em;
@@ -721,11 +859,68 @@ app.get('/display', (req, res) => {
        const fadeAfter = ${fade};
        const showTimestamps = ${timestamps === 'true' || timestamps === '1'};
        const maxLines = ${maxlines};
        const requestedFont = "${fontfamily}";
        const container = document.getElementById('transcriptions');
        const statusEl = document.getElementById('status');
        const userColors = new Map();
        let colorIndex = 0;
        // Track preview elements by user for replacement
        const userPreviews = new Map();
        // Track loaded Google Fonts to avoid duplicate loading
        const loadedGoogleFonts = new Set();
        // Load a Google Font dynamically
        function loadGoogleFont(fontName) {
            if (loadedGoogleFonts.has(fontName)) return;
            loadedGoogleFonts.add(fontName);
            const link = document.createElement('link');
            link.rel = 'stylesheet';
            // Google Fonts expects spaces as '+' in the URL, not %2B
            link.href = \`https://fonts.googleapis.com/css2?family=\${fontName.replace(/ /g, '+')}&display=swap\`;
            document.head.appendChild(link);
            console.log('Loading Google Font:', fontName);
        }
        // Load custom fonts for this room
        async function loadCustomFonts() {
            try {
                const response = await fetch(\`/api/fonts?room=\${encodeURIComponent(room)}\`);
                const data = await response.json();
                if (data.fonts && data.fonts.length > 0) {
                    let fontFaceCSS = '';
                    for (const fontName of data.fonts) {
                        // Determine format based on extension
                        let format = 'truetype';
                        if (fontName.endsWith('.woff2')) format = 'woff2';
                        else if (fontName.endsWith('.woff')) format = 'woff';
                        else if (fontName.endsWith('.otf')) format = 'opentype';
                        // Font family name is filename without extension
                        const familyName = fontName.replace(/\\.(ttf|otf|woff2?)\$/i, '');
                        fontFaceCSS += \`
                            @font-face {
                                font-family: "\${familyName}";
                                src: url("/fonts/\${encodeURIComponent(room)}/\${encodeURIComponent(fontName)}") format("\${format}");
                                font-weight: normal;
                                font-style: normal;
                            }
                        \`;
                    }
                    // Inject the font-face rules
                    document.getElementById('custom-fonts').textContent = fontFaceCSS;
                    console.log('Loaded custom fonts:', data.fonts);
                }
            } catch (err) {
                console.error('Error loading custom fonts:', err);
            }
        }
        function getUserColor(userName) {
            if (!userColors.has(userName)) {
                const hue = (colorIndex * 137.5) % 360;
@@ -737,32 +932,96 @@ app.get('/display', (req, res) => {
        }
        function addTranscription(data) {
-            const div = document.createElement('div');
+            const isPreview = data.is_preview || false;
-            div.className = 'transcription';
+            const userName = data.user_name || '';
            const fontFamily = data.font_family || null;  // Per-speaker font name
            const fontType = data.font_type || null;  // "websafe", "google", or "custom"
-            const userColor = getUserColor(data.user_name);
+            // Debug: Log received font info
            if (fontFamily) {
                console.log('Received transcription with font:', fontFamily, '(' + fontType + ')');
            }
            // Load Google Font if needed
            if (fontType === 'google' && fontFamily) {
                loadGoogleFont(fontFamily);
            }
            // Build font style string if font is set
            // Use single quotes for font name to avoid conflict with style="" double quotes
            const fontStyle = fontFamily ? \`font-family: '\${fontFamily}', sans-serif;\` : '';
            // If this is a final transcription, remove any existing preview from this user
            if (!isPreview && userPreviews.has(userName)) {
                const previewEl = userPreviews.get(userName);
                if (previewEl && previewEl.parentNode) {
                    previewEl.remove();
                }
                userPreviews.delete(userName);
            }
            // If this is a preview, update existing preview or create new one
            if (isPreview && userPreviews.has(userName)) {
                const previewEl = userPreviews.get(userName);
                if (previewEl && previewEl.parentNode) {
                    // Update existing preview
                    const userColor = getUserColor(userName);
                    let html = '';
                    if (showTimestamps && data.timestamp) {
                        html += \`<span class="timestamp">[\${data.timestamp}]</span>\`;
                    }
                    if (userName) {
                        html += \`<span class="user" style="color: \${userColor}">\${userName}:</span>\`;
                    }
                    html += \`<span class="preview-indicator">[...]</span>\`;
                    html += \`<span class="text" style="\${fontStyle}">\${data.text}</span>\`;
                    previewEl.innerHTML = html;
                    return;
                }
            }
            const div = document.createElement('div');
            div.className = isPreview ? 'transcription preview' : 'transcription';
            const userColor = getUserColor(userName);
            let html = '';
            if (showTimestamps && data.timestamp) {
                html += \`<span class="timestamp">[\${data.timestamp}]</span>\`;
            }
-            if (data.user_name) {
+            if (userName) {
-                html += \`<span class="user" style="color: \${userColor}">\${data.user_name}:</span>\`;
+                html += \`<span class="user" style="color: \${userColor}">\${userName}:</span>\`;
            }
-            html += \`<span class="text">\${data.text}</span>\`;
+            if (isPreview) {
                html += \`<span class="preview-indicator">[...]</span>\`;
            }
            html += \`<span class="text" style="\${fontStyle}">\${data.text}</span>\`;
            div.innerHTML = html;
            container.appendChild(div);
-            if (fadeAfter > 0) {
+            // Track preview element for this user
-                setTimeout(() => {
+            if (isPreview) {
-                    div.classList.add('fading');
+                userPreviews.set(userName, div);
-                    setTimeout(() => div.remove(), 1000);
+            } else {
-                }, fadeAfter * 1000);
+                // Only set fade timer for final transcriptions
                if (fadeAfter > 0) {
                    setTimeout(() => {
                        div.classList.add('fading');
                        setTimeout(() => div.remove(), 1000);
                    }, fadeAfter * 1000);
                }
            }
-            // Enforce max lines limit
+            // Enforce max lines limit (don't remove current previews)
            while (container.children.length > maxLines) {
-                container.removeChild(container.firstChild);
+                const first = container.firstChild;
                // Don't remove if it's an active preview
                let isActivePreview = false;
                userPreviews.forEach((el) => {
                    if (el === first) isActivePreview = true;
                });
                if (isActivePreview) break;
                container.removeChild(first);
            }
        }
@@ -821,7 +1080,8 @@ app.get('/display', (req, res) => {
            };
        }
-        loadRecent().then(connect);
+        // Load custom fonts, then recent transcriptions, then connect WebSocket
        loadCustomFonts().then(() => loadRecent()).then(connect);
    </script>
 </body>
 </html>
--- a/server/test-server.sh
+++ b/server/test-server.sh
@@ -1,160 +0,0 @@
 #!/bin/bash
 # Test script for multi-user transcription servers
 set -e
 echo "================================="
 echo "Multi-User Server Test Script"
 echo "================================="
 echo ""
 # Colors
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 # Get server URL from user
 echo "What server are you testing?"
 echo "1) PHP Server"
 echo "2) Node.js Server"
 echo "3) Custom URL"
 read -p "Choice (1-3): " choice
 case $choice in
    1)
        read -p "Enter PHP server URL (e.g., https://example.com/transcription/server.php): " SERVER_URL
        API_ENDPOINT="${SERVER_URL}?action=send"
        ;;
    2)
        read -p "Enter Node.js server URL (e.g., http://localhost:3000): " SERVER_URL
        API_ENDPOINT="${SERVER_URL}/api/send"
        ;;
    3)
        read -p "Enter API endpoint URL: " API_ENDPOINT
        ;;
    *)
        echo "Invalid choice"
        exit 1
        ;;
 esac
 # Get room details
 read -p "Room name [test]: " ROOM
 ROOM=${ROOM:-test}
 read -p "Passphrase [testpass]: " PASSPHRASE
 PASSPHRASE=${PASSPHRASE:-testpass}
 read -p "User name [TestUser]: " USER_NAME
 USER_NAME=${USER_NAME:-TestUser}
 echo ""
 echo "================================="
 echo "Testing connection to server..."
 echo "================================="
 echo "API Endpoint: $API_ENDPOINT"
 echo "Room: $ROOM"
 echo "User: $USER_NAME"
 echo ""
 # Test 1: Send a transcription
 echo "Test 1: Sending test transcription..."
 RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$API_ENDPOINT" \
  -H "Content-Type: application/json" \
  -d "{
    \"room\": \"$ROOM\",
    \"passphrase\": \"$PASSPHRASE\",
    \"user_name\": \"$USER_NAME\",
    \"text\": \"Test message from test script\",
    \"timestamp\": \"$(date +%H:%M:%S)\"
  }")
 HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
 BODY=$(echo "$RESPONSE" | sed '$d')
 if [ "$HTTP_CODE" = "200" ]; then
    echo -e "${GREEN}✓ Success!${NC} Server responded with 200 OK"
    echo "Response: $BODY"
 else
    echo -e "${RED}✗ Failed!${NC} Server responded with HTTP $HTTP_CODE"
    echo "Response: $BODY"
    exit 1
 fi
 echo ""
 # Test 2: Send multiple messages
 echo "Test 2: Sending 5 test messages..."
 for i in {1..5}; do
    curl -s -X POST "$API_ENDPOINT" \
      -H "Content-Type: application/json" \
      -d "{
        \"room\": \"$ROOM\",
        \"passphrase\": \"$PASSPHRASE\",
        \"user_name\": \"$USER_NAME\",
        \"text\": \"Test message #$i\",
        \"timestamp\": \"$(date +%H:%M:%S)\"
      }" > /dev/null
    echo -e "${GREEN}✓${NC} Sent message #$i"
    sleep 0.5
 done
 echo ""
 # Test 3: List transcriptions (if available)
 echo "Test 3: Retrieving transcriptions..."
 if [ "$choice" = "1" ]; then
    LIST_URL="${SERVER_URL}?action=list&room=$ROOM"
 elif [ "$choice" = "2" ]; then
    LIST_URL="${SERVER_URL}/api/list?room=$ROOM"
 else
    echo "Skipping list test for custom URL"
    LIST_URL=""
 fi
 if [ -n "$LIST_URL" ]; then
    LIST_RESPONSE=$(curl -s "$LIST_URL")
    COUNT=$(echo "$LIST_RESPONSE" | grep -o "\"text\"" | wc -l)
    if [ "$COUNT" -gt 0 ]; then
        echo -e "${GREEN}✓ Success!${NC} Retrieved $COUNT transcriptions"
        echo "$LIST_RESPONSE" | python3 -m json.tool 2>/dev/null || echo "$LIST_RESPONSE"
    else
        echo -e "${YELLOW}⚠ Warning:${NC} No transcriptions retrieved"
        echo "$LIST_RESPONSE"
    fi
 fi
 echo ""
 echo "================================="
 echo "Test Complete!"
 echo "================================="
 echo ""
 echo "Next steps:"
 echo ""
 if [ "$choice" = "1" ]; then
    echo "1. Open this URL in OBS Browser Source:"
    echo "   ${SERVER_URL%server.php}display-polling.php?room=$ROOM&fade=10"
    echo ""
    echo "2. Or test in your browser first:"
    echo "   ${SERVER_URL%server.php}display-polling.php?room=$ROOM"
 elif [ "$choice" = "2" ]; then
    echo "1. Open this URL in OBS Browser Source:"
    echo "   ${SERVER_URL}/display?room=$ROOM&fade=10"
    echo ""
    echo "2. Or test in your browser first:"
    echo "   ${SERVER_URL}/display?room=$ROOM"
 fi
 echo ""
 echo "3. Configure desktop app with these settings:"
 echo "   - Server URL: $API_ENDPOINT"
 echo "   - Room: $ROOM"
 echo "   - Passphrase: $PASSPHRASE"
 echo ""
 echo "4. Start transcribing!"
 echo ""
--- a/server/transcription-service/README.md
+++ b/server/transcription-service/README.md
@@ -0,0 +1,173 @@
 # Remote Transcription Service
 A standalone GPU-accelerated transcription service that accepts audio streams over WebSocket and returns transcriptions. Designed for offloading transcription processing from client machines to a GPU-equipped server.
 ## Features
 - WebSocket-based audio streaming
 - API key authentication
 - GPU acceleration (CUDA)
 - Multiple simultaneous clients
 - Health check endpoints
 ## Requirements
 - Python 3.10+
 - NVIDIA GPU with CUDA support (recommended)
 - 4GB+ VRAM for base model, 8GB+ for large models
 ## Installation
 ```bash
 cd server/transcription-service
 # Create virtual environment
 python -m venv venv
 source venv/bin/activate  # Linux/Mac
 # or: venv\Scripts\activate  # Windows
 # Install dependencies
 pip install -r requirements.txt
 # For GPU support, install CUDA version of PyTorch
 pip install torch --index-url https://download.pytorch.org/whl/cu121
 ```
 ## Configuration
 Set environment variables before starting:
 ```bash
 # Required: API key(s) for authentication
 export TRANSCRIPTION_API_KEY="your-secret-key"
 # Or multiple keys (comma-separated)
 export TRANSCRIPTION_API_KEYS="key1,key2,key3"
 # Optional: Model selection (default: base.en)
 export TRANSCRIPTION_MODEL="base.en"
 ```
 ## Running
 ```bash
 # Start the service
 python server.py --host 0.0.0.0 --port 8765
 # Or with custom model
 python server.py --host 0.0.0.0 --port 8765 --model medium.en
 ```
 ## API Endpoints
 ### Health Check
 ```
 GET /
 GET /health
 ```
 ### WebSocket Transcription
 ```
 WS /ws/transcribe
 ```
 ## WebSocket Protocol
 1. **Authentication**
   ```json
   // Client sends
   {"type": "auth", "api_key": "your-key"}
   // Server responds
   {"type": "auth_result", "success": true, "message": "..."}
   ```
 2. **Send Audio**
   ```json
   // Client sends (audio as base64-encoded float32 numpy array)
   {"type": "audio", "data": "base64...", "sample_rate": 16000}
   // Server responds
   {"type": "transcription", "text": "Hello world", "is_preview": false, "timestamp": "..."}
   ```
 3. **Keep-alive**
   ```json
   // Client sends
   {"type": "ping"}
   // Server responds
   {"type": "pong"}
   ```
 4. **Disconnect**
   ```json
   // Client sends
   {"type": "end"}
   ```
 ## Client Integration
 The Local Transcription app includes a remote transcription client. Configure in Settings:
 1. Enable "Remote Processing"
 2. Set Server URL: `ws://your-server:8765/ws/transcribe`
 3. Enter your API key
 ## Deployment
 ### Docker
 ```dockerfile
 FROM python:3.11-slim
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 COPY server.py .
 ENV TRANSCRIPTION_MODEL=base.en
 EXPOSE 8765
 CMD ["python", "server.py", "--host", "0.0.0.0", "--port", "8765"]
 ```
 ### Systemd Service
 ```ini
 [Unit]
 Description=Remote Transcription Service
 After=network.target
 [Service]
 Type=simple
 User=transcription
 WorkingDirectory=/opt/transcription-service
 Environment=TRANSCRIPTION_API_KEY=your-key
 Environment=TRANSCRIPTION_MODEL=base.en
 ExecStart=/opt/transcription-service/venv/bin/python server.py
 Restart=always
 [Install]
 WantedBy=multi-user.target
 ```
 ## Models
 Available Whisper models (larger = better quality, slower):
 | Model | Parameters | VRAM | Speed |
 |-------|-----------|------|-------|
 | tiny.en | 39M | ~1GB | Fastest |
 | base.en | 74M | ~1GB | Fast |
 | small.en | 244M | ~2GB | Moderate |
 | medium.en | 769M | ~5GB | Slow |
 | large-v3 | 1550M | ~10GB | Slowest |
 ## Security Notes
 - Always use API key authentication in production
 - Use HTTPS/WSS in production (via reverse proxy)
 - Rate limit connections if needed
 - Monitor GPU usage to prevent overload
--- a/server/transcription-service/requirements.txt
+++ b/server/transcription-service/requirements.txt
@@ -0,0 +1,8 @@
 fastapi>=0.100.0
 uvicorn>=0.22.0
 websockets>=11.0
 numpy>=1.24.0
 pydantic>=2.0.0
 faster-whisper>=0.10.0
 RealtimeSTT>=0.1.0
 torch>=2.0.0
--- a/server/transcription-service/server.py
+++ b/server/transcription-service/server.py
@@ -0,0 +1,366 @@
 """
 Remote Transcription Service
 A standalone FastAPI WebSocket server that accepts audio streams and returns transcriptions.
 Designed to run on a GPU-equipped server for offloading transcription processing.
 Usage:
    python server.py [--host HOST] [--port PORT] [--model MODEL]
 Environment variables:
    TRANSCRIPTION_API_KEY: Required API key for authentication
    TRANSCRIPTION_MODEL: Whisper model to use (default: base.en)
 """
 import asyncio
 import argparse
 import os
 import sys
 import json
 import base64
 import logging
 from datetime import datetime
 from pathlib import Path
 from typing import Optional, Dict, Set
 from threading import Thread, Lock
 import numpy as np
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Depends
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import uvicorn
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 # API Key authentication
 API_KEYS: Set[str] = set()
 def load_api_keys():
    """Load API keys from environment variable."""
    global API_KEYS
    keys_env = os.environ.get('TRANSCRIPTION_API_KEYS', '')
    if keys_env:
        API_KEYS = set(key.strip() for key in keys_env.split(',') if key.strip())
    # Also support single key
    single_key = os.environ.get('TRANSCRIPTION_API_KEY', '')
    if single_key:
        API_KEYS.add(single_key)
    if not API_KEYS:
        logger.warning("No API keys configured. Set TRANSCRIPTION_API_KEY or TRANSCRIPTION_API_KEYS environment variable.")
        logger.warning("Service will accept all connections (INSECURE for production).")
 def verify_api_key(api_key: str) -> bool:
    """Verify if the API key is valid."""
    if not API_KEYS:
        return True  # No authentication if no keys configured
    return api_key in API_KEYS
 app = FastAPI(
    title="Remote Transcription Service",
    description="GPU-accelerated speech-to-text transcription service",
    version="1.0.0"
 )
 # Enable CORS for all origins (configure appropriately for production)
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 class TranscriptionEngine:
    """Manages the transcription engine with thread-safe access."""
    def __init__(self, model: str = "base.en", device: str = "auto"):
        self.model_name = model
        self.device = device
        self.recorder = None
        self.lock = Lock()
        self.is_initialized = False
    def initialize(self):
        """Initialize the transcription engine."""
        if self.is_initialized:
            return True
        try:
            from RealtimeSTT import AudioToTextRecorder
            # Determine device
            if self.device == "auto":
                import torch
                if torch.cuda.is_available():
                    self.device = "cuda"
                else:
                    self.device = "cpu"
            logger.info(f"Initializing transcription engine with model={self.model_name}, device={self.device}")
            # Create recorder with minimal configuration
            # We'll feed audio directly, not capture from microphone
            self.recorder = AudioToTextRecorder(
                model=self.model_name,
                language="en",
                device=self.device,
                compute_type="default",
                input_device_index=None,  # No mic capture
                silero_sensitivity=0.4,
                webrtc_sensitivity=3,
                post_speech_silence_duration=0.3,
                min_length_of_recording=0.5,
                enable_realtime_transcription=True,
                realtime_model_type="tiny.en",
            )
            self.is_initialized = True
            logger.info("Transcription engine initialized successfully")
            return True
        except Exception as e:
            logger.error(f"Failed to initialize transcription engine: {e}")
            return False
    def transcribe(self, audio_data: np.ndarray, sample_rate: int = 16000) -> Optional[str]:
        """
        Transcribe audio data.
        Args:
            audio_data: Audio data as numpy array
            sample_rate: Sample rate of the audio
        Returns:
            Transcribed text or None if failed
        """
        with self.lock:
            if not self.is_initialized:
                return None
            try:
                # Use faster-whisper directly for one-shot transcription
                from faster_whisper import WhisperModel
                if not hasattr(self, '_whisper_model'):
                    self._whisper_model = WhisperModel(
                        self.model_name,
                        device=self.device,
                        compute_type="default"
                    )
                # Transcribe
                segments, info = self._whisper_model.transcribe(
                    audio_data,
                    beam_size=5,
                    language="en"
                )
                # Combine segments
                text = " ".join(segment.text for segment in segments)
                return text.strip()
            except Exception as e:
                logger.error(f"Transcription error: {e}")
                return None
 # Global transcription engine
 engine: Optional[TranscriptionEngine] = None
 class ClientConnection:
    """Represents an active client connection."""
    def __init__(self, websocket: WebSocket, client_id: str):
        self.websocket = websocket
        self.client_id = client_id
        self.audio_buffer = []
        self.sample_rate = 16000
        self.connected_at = datetime.now()
 # Active connections
 active_connections: Dict[str, ClientConnection] = {}
@app.on_event("startup")
 async def startup_event():
    """Initialize service on startup."""
    load_api_keys()
    global engine
    model = os.environ.get('TRANSCRIPTION_MODEL', 'base.en')
    engine = TranscriptionEngine(model=model)
    # Initialize in background thread to not block startup
    def init_engine():
        engine.initialize()
    Thread(target=init_engine, daemon=True).start()
    logger.info("Remote Transcription Service started")
@app.get("/")
 async def root():
    """Health check endpoint."""
    return {
        "service": "Remote Transcription Service",
        "status": "running",
        "model": engine.model_name if engine else "not loaded",
        "device": engine.device if engine else "unknown",
        "active_connections": len(active_connections)
    }
@app.get("/health")
 async def health():
    """Detailed health check."""
    return {
        "status": "healthy" if engine and engine.is_initialized else "initializing",
        "model": engine.model_name if engine else None,
        "device": engine.device if engine else None,
        "initialized": engine.is_initialized if engine else False,
        "connections": len(active_connections)
    }
@app.websocket("/ws/transcribe")
 async def websocket_transcribe(websocket: WebSocket):
    """
    WebSocket endpoint for audio transcription.
    Protocol:
    1. Client sends: {"type": "auth", "api_key": "your-key"}
    2. Server responds: {"type": "auth_result", "success": true/false}
    3. Client sends audio chunks: {"type": "audio", "data": base64_audio, "sample_rate": 16000}
    4. Server responds with transcription: {"type": "transcription", "text": "...", "is_preview": false}
    5. Client can send: {"type": "end"} to close connection
    """
    await websocket.accept()
    client_id = f"client_{id(websocket)}_{datetime.now().timestamp()}"
    authenticated = False
    logger.info(f"New WebSocket connection: {client_id}")
    try:
        while True:
            data = await websocket.receive_text()
            message = json.loads(data)
            msg_type = message.get("type", "")
            if msg_type == "auth":
                # Authenticate client
                api_key = message.get("api_key", "")
                if verify_api_key(api_key):
                    authenticated = True
                    active_connections[client_id] = ClientConnection(websocket, client_id)
                    await websocket.send_json({
                        "type": "auth_result",
                        "success": True,
                        "message": "Authentication successful"
                    })
                    logger.info(f"Client {client_id} authenticated")
                else:
                    await websocket.send_json({
                        "type": "auth_result",
                        "success": False,
                        "message": "Invalid API key"
                    })
                    logger.warning(f"Client {client_id} failed authentication")
                    await websocket.close(code=4001, reason="Invalid API key")
                    return
            elif msg_type == "audio":
                if not authenticated:
                    await websocket.send_json({
                        "type": "error",
                        "message": "Not authenticated"
                    })
                    continue
                # Decode audio data
                audio_b64 = message.get("data", "")
                sample_rate = message.get("sample_rate", 16000)
                if audio_b64:
                    try:
                        audio_bytes = base64.b64decode(audio_b64)
                        audio_data = np.frombuffer(audio_bytes, dtype=np.float32)
                        # Transcribe
                        if engine and engine.is_initialized:
                            text = engine.transcribe(audio_data, sample_rate)
                            if text:
                                await websocket.send_json({
                                    "type": "transcription",
                                    "text": text,
                                    "is_preview": False,
                                    "timestamp": datetime.now().isoformat()
                                })
                        else:
                            await websocket.send_json({
                                "type": "error",
                                "message": "Transcription engine not ready"
                            })
                    except Exception as e:
                        logger.error(f"Audio processing error: {e}")
                        await websocket.send_json({
                            "type": "error",
                            "message": f"Audio processing error: {str(e)}"
                        })
            elif msg_type == "end":
                logger.info(f"Client {client_id} requested disconnect")
                break
            elif msg_type == "ping":
                await websocket.send_json({"type": "pong"})
    except WebSocketDisconnect:
        logger.info(f"Client {client_id} disconnected")
    except Exception as e:
        logger.error(f"WebSocket error for {client_id}: {e}")
    finally:
        if client_id in active_connections:
            del active_connections[client_id]
 def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(description="Remote Transcription Service")
    parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
    parser.add_argument("--port", type=int, default=8765, help="Port to bind to")
    parser.add_argument("--model", default="base.en", help="Whisper model to use")
    args = parser.parse_args()
    # Set model from command line
    os.environ.setdefault('TRANSCRIPTION_MODEL', args.model)
    logger.info(f"Starting Remote Transcription Service on {args.host}:{args.port}")
    logger.info(f"Model: {args.model}")
    uvicorn.run(
        app,
        host=args.host,
        port=args.port,
        log_level="info"
    )
 if __name__ == "__main__":
    main()
--- a/server/web_display.py
+++ b/server/web_display.py
@@ -1,8 +1,9 @@
 """Web server for displaying transcriptions in a browser (for OBS browser source)."""
 import asyncio
 from pathlib import Path
 from fastapi import FastAPI, WebSocket
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse, FileResponse
 from typing import List, Optional
 import json
 from datetime import datetime
@@ -11,7 +12,11 @@ from datetime import datetime
 class TranscriptionWebServer:
    """Web server for displaying transcriptions."""
-    def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial", font_size: int = 16):
+    def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True,
                 fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial",
                 font_size: int = 16, fonts_dir: Optional[Path] = None,
                 font_source: str = "System Font", websafe_font: str = "Arial",
                 google_font: str = "Roboto"):
        """
        Initialize web server.
@@ -21,8 +26,12 @@ class TranscriptionWebServer:
            show_timestamps: Whether to show timestamps in transcriptions
            fade_after_seconds: Time in seconds before transcriptions fade out (0 = never fade)
            max_lines: Maximum number of lines to display at once
-            font_family: Font family for display
+            font_family: Font family for display (system font)
            font_size: Font size in pixels
            fonts_dir: Directory containing custom font files
            font_source: Font source type ("System Font", "Web-Safe", "Google Font")
            websafe_font: Web-safe font name
            google_font: Google Font name
        """
        self.host = host
        self.port = port
@@ -31,6 +40,10 @@ class TranscriptionWebServer:
        self.max_lines = max_lines
        self.font_family = font_family
        self.font_size = font_size
        self.fonts_dir = fonts_dir
        self.font_source = font_source
        self.websafe_font = websafe_font
        self.google_font = google_font
        self.app = FastAPI()
        self.active_connections: List[WebSocket] = []
        self.transcriptions = []  # Store recent transcriptions
@@ -46,6 +59,23 @@ class TranscriptionWebServer:
            """Serve the transcription display page."""
            return self._get_html()
        @self.app.get("/fonts/{font_file}")
        async def serve_font(font_file: str):
            """Serve custom font files."""
            if self.fonts_dir:
                font_path = self.fonts_dir / font_file
                if font_path.exists() and font_path.suffix.lower() in {'.ttf', '.otf', '.woff', '.woff2'}:
                    # Determine MIME type
                    mime_types = {
                        '.ttf': 'font/ttf',
                        '.otf': 'font/otf',
                        '.woff': 'font/woff',
                        '.woff2': 'font/woff2'
                    }
                    media_type = mime_types.get(font_path.suffix.lower(), 'application/octet-stream')
                    return FileResponse(font_path, media_type=media_type)
            return HTMLResponse(status_code=404, content="Font not found")
        @self.app.websocket("/ws")
        async def websocket_endpoint(websocket: WebSocket):
            """WebSocket endpoint for real-time updates."""
@@ -64,19 +94,70 @@ class TranscriptionWebServer:
            except:
                self.active_connections.remove(websocket)
    def _get_font_face_css(self) -> str:
        """Generate @font-face CSS rules for custom fonts."""
        if not self.fonts_dir or not self.fonts_dir.exists():
            return ""
        css_rules = []
        font_extensions = {'.ttf', '.otf', '.woff', '.woff2'}
        format_map = {
            '.ttf': 'truetype',
            '.otf': 'opentype',
            '.woff': 'woff',
            '.woff2': 'woff2'
        }
        for font_file in self.fonts_dir.iterdir():
            if font_file.suffix.lower() in font_extensions:
                font_name = font_file.stem
                font_format = format_map.get(font_file.suffix.lower(), 'truetype')
                css_rules.append(f"""
        @font-face {{
            font-family: '{font_name}';
            src: url('/fonts/{font_file.name}') format('{font_format}');
            font-weight: normal;
            font-style: normal;
        }}""")
        return "\n".join(css_rules)
    def _get_effective_font(self) -> str:
        """Get the effective font family based on font_source setting."""
        if self.font_source == "Google Font" and self.google_font:
            return self.google_font
        elif self.font_source == "Web-Safe" and self.websafe_font:
            return self.websafe_font
        else:
            return self.font_family
    def _get_google_font_link(self) -> str:
        """Generate Google Fonts link tag if using Google Font."""
        if self.font_source == "Google Font" and self.google_font:
            font_name = self.google_font.replace(' ', '+')
            return f'<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family={font_name}&display=swap">'
        return ""
    def _get_html(self) -> str:
        """Generate HTML for transcription display."""
        # Generate custom font CSS
        font_face_css = self._get_font_face_css()
        google_font_link = self._get_google_font_link()
        effective_font = self._get_effective_font()
        return f"""
 <!DOCTYPE html>
 <html>
 <head>
    <title>Transcription Display</title>
    {google_font_link}
    <style>
        {font_face_css}
        body {{
            margin: 0;
            padding: 20px;
            background: transparent;
-            font-family: {self.font_family}, sans-serif;
+            font-family: '{effective_font}', sans-serif;
            font-size: {self.font_size}px;
            color: white;
            overflow: hidden;
@@ -108,6 +189,14 @@ class TranscriptionWebServer:
        .text {{
            color: white;
        }}
        .transcription.preview {{
            font-style: italic;
        }}
        .preview-indicator {{
            color: #888;
            font-size: 0.85em;
            margin-right: 5px;
        }}
        @keyframes slideIn {{
            from {{
                opacity: 0;
@@ -129,9 +218,15 @@ class TranscriptionWebServer:
        const fadeAfterSeconds = {self.fade_after_seconds};
        const maxLines = {self.max_lines};
        let currentPreviewElement = null;
        ws.onmessage = (event) => {{
            const data = JSON.parse(event.data);
-            addTranscription(data);
+            if (data.is_preview) {{
                handlePreview(data);
            }} else {{
                addTranscription(data);
            }}
        }};
        ws.onclose = () => {{
@@ -146,35 +241,86 @@ class TranscriptionWebServer:
            }}
        }}, 30000);
-        function addTranscription(data) {{
+        function handlePreview(data) {{
            // If there's already a preview, update it
            if (currentPreviewElement) {{
                updatePreviewContent(currentPreviewElement, data);
            }} else {{
                // Create new preview element
                currentPreviewElement = createTranscriptionElement(data, true);
                container.appendChild(currentPreviewElement);
            }}
            // Enforce max lines limit
            while (container.children.length > maxLines) {{
                const first = container.firstChild;
                if (first === currentPreviewElement) break; // Don't remove current preview
                container.removeChild(first);
            }}
        }}
        function updatePreviewContent(element, data) {{
            let html = '';
            if (data.timestamp) {{
                html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
            }}
            if (data.user_name && data.user_name.trim()) {{
                html += `<span class="user">${{data.user_name}}:</span>`;
            }}
            html += `<span class="preview-indicator">[...]</span>`;
            html += `<span class="text">${{data.text}}</span>`;
            element.innerHTML = html;
        }}
        function createTranscriptionElement(data, isPreview) {{
            const div = document.createElement('div');
-            div.className = 'transcription';
+            div.className = isPreview ? 'transcription preview' : 'transcription';
            let html = '';
            if (data.timestamp) {{
                html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
            }}
-            if (data.user_name) {{
+            if (data.user_name && data.user_name.trim()) {{
                html += `<span class="user">${{data.user_name}}:</span>`;
            }}
            if (isPreview) {{
                html += `<span class="preview-indicator">[...]</span>`;
            }}
            html += `<span class="text">${{data.text}}</span>`;
            div.innerHTML = html;
-            container.appendChild(div);
+            return div;
        }}
-            // Set up fade-out if enabled
+        function addTranscription(data) {{
-            if (fadeAfterSeconds > 0) {{
+            // If there's a preview, replace it with final transcription
-                setTimeout(() => {{
+            if (currentPreviewElement) {{
-                    // Start fade animation
+                currentPreviewElement.className = 'transcription';
-                    div.classList.add('fading');
+                let html = '';
                if (data.timestamp) {{
                    html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
                }}
                if (data.user_name && data.user_name.trim()) {{
                    html += `<span class="user">${{data.user_name}}:</span>`;
                }}
                html += `<span class="text">${{data.text}}</span>`;
                currentPreviewElement.innerHTML = html;
-                    // Remove element after fade completes
+                // Set up fade-out for the final transcription
-                    setTimeout(() => {{
+                if (fadeAfterSeconds > 0) {{
-                        if (div.parentNode === container) {{
+                    setupFadeOut(currentPreviewElement);
-                            container.removeChild(div);
+                }}
-                        }}
+
-                    }}, 1000); // Match the CSS transition duration
+                currentPreviewElement = null;
-                }}, fadeAfterSeconds * 1000);
+            }} else {{
                // No preview to replace, add new element
                const div = createTranscriptionElement(data, false);
                container.appendChild(div);
                // Set up fade-out if enabled
                if (fadeAfterSeconds > 0) {{
                    setupFadeOut(div);
                }}
            }}
            // Enforce max lines limit
@@ -182,6 +328,20 @@ class TranscriptionWebServer:
                container.removeChild(container.firstChild);
            }}
        }}
        function setupFadeOut(element) {{
            setTimeout(() => {{
                // Start fade animation
                element.classList.add('fading');
                // Remove element after fade completes
                setTimeout(() => {{
                    if (element.parentNode === container) {{
                        container.removeChild(element);
                    }}
                }}, 1000); // Match the CSS transition duration
            }}, fadeAfterSeconds * 1000);
        }}
    </script>
 </body>
 </html>
@@ -225,6 +385,43 @@ class TranscriptionWebServer:
        for conn in disconnected:
            self.active_connections.remove(conn)
    async def broadcast_preview(self, text: str, user_name: str = "", timestamp: Optional[datetime] = None):
        """
        Broadcast a preview transcription to all connected clients.
        Preview transcriptions are shown in italics and will be replaced by final.
        Args:
            text: Preview transcription text
            user_name: User/speaker name
            timestamp: Timestamp of transcription
        """
        if timestamp is None:
            timestamp = datetime.now()
        trans_data = {
            "text": text,
            "user_name": user_name,
            "is_preview": True,  # Flag to indicate this is a preview
        }
        # Only include timestamp if enabled
        if self.show_timestamps:
            trans_data["timestamp"] = timestamp.strftime("%H:%M:%S")
        # Don't store previews in transcriptions list (they're temporary)
        # Broadcast to all connected clients
        disconnected = []
        for connection in self.active_connections:
            try:
                await connection.send_json(trans_data)
            except:
                disconnected.append(connection)
        # Remove disconnected clients
        for conn in disconnected:
            self.active_connections.remove(conn)
    async def start(self):
        """Start the web server."""
        import uvicorn
--- a/version.py
+++ b/version.py
@@ -0,0 +1,15 @@
 """Version information for Local Transcription."""
 __version__ = "1.2.4"
 __version_info__ = (1, 2, 4)
 # Version history:
 # 1.0.0 - Initial release with:
 #   - Real-time speech-to-text transcription using Whisper models
 #   - Local web display for OBS browser source integration
 #   - Multi-user server sync with Node.js backend
 #   - Two-stage transcription (fast preview + refined final)
 #   - Custom font support (local and forwarded to sync server)
 #   - Single instance prevention
 #   - Fast speaker mode for continuous speech
 #   - Remote GPU processing offload support