diff --git a/CLAUDE.md b/CLAUDE.md index cedaf50..d1e6dd2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,7 +10,7 @@ Local Transcription is a desktop application for real-time speech-to-text transc - Standalone desktop GUI (PySide6/Qt) - Local transcription with CPU/GPU support - Built-in web server for OBS browser source integration -- Optional PHP-based multi-user server for syncing transcriptions across users +- Optional Node.js-based multi-user server for syncing transcriptions across users - Noise suppression and Voice Activity Detection (VAD) - Cross-platform builds (Linux/Windows) with PyInstaller @@ -29,12 +29,12 @@ local-transcription/ │ ├── main_window_qt.py # Main application window (PySide6) │ ├── settings_dialog_qt.py # Settings dialog (PySide6) │ └── transcription_display_qt.py # Display widget -├── server/ # Web display server -│ ├── web_display.py # FastAPI server for OBS browser source -│ └── php/ # Optional multi-user PHP server -│ ├── server.php # Multi-user sync server -│ ├── display.php # Multi-user web display -│ └── README.md # PHP server documentation +├── server/ # Web display servers +│ ├── web_display.py # FastAPI server for OBS browser source (local) +│ └── nodejs/ # Optional multi-user Node.js server +│ ├── server.js # Multi-user sync server with WebSocket +│ ├── package.json # Node.js dependencies +│ └── README.md # Server deployment documentation ├── config/ # Example configuration files │ └── default_config.yaml # Default settings template ├── main.py # GUI application entry point @@ -128,28 +128,20 @@ uv run python -m uvicorn server.web_display:app --reload - Used for OBS browser source integration - Single-user (displays only local transcriptions) -**Multi-User Servers** (Optional - for syncing across multiple users) +**Multi-User Server** (Optional - for syncing across multiple users) -Three options available: +**Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **RECOMMENDED** +- Real-time WebSocket support (< 100ms latency) +- Handles 100+ concurrent users +- Easy deployment to VPS/cloud hosting (Railway, Heroku, DigitalOcean, or any VPS) +- Configurable display options via URL parameters: + - `timestamps=true/false` - Show/hide timestamps + - `maxlines=50` - Maximum visible lines (prevents scroll bars in OBS) + - `fontsize=16` - Font size in pixels + - `fontfamily=Arial` - Font family + - `fade=10` - Seconds before text fades (0 = never) -1. **PHP with Polling** ([server/php/display-polling.php](server/php/display-polling.php)) - **RECOMMENDED for PHP** - - Works on ANY shared hosting (no buffering issues) - - Uses HTTP polling instead of SSE - - 1-2 second latency, very reliable - - File-based storage, no database needed - -2. **Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **BEST PERFORMANCE** - - Real-time WebSocket support (< 100ms latency) - - Handles 100+ concurrent users - - Requires VPS/cloud hosting (Railway, Heroku, DigitalOcean) - - Much better than PHP for real-time applications - -3. **PHP with SSE** ([server/php/display.php](server/php/display.php)) - **NOT RECOMMENDED** - - Has buffering issues on most shared hosting - - PHP-FPM incompatibility - - Use polling or Node.js instead - -See [server/COMPARISON.md](server/COMPARISON.md) and [server/QUICK_FIX.md](server/QUICK_FIX.md) for details +See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructions ### Configuration System @@ -281,19 +273,17 @@ See [server/COMPARISON.md](server/COMPARISON.md) and [server/QUICK_FIX.md](serve 3. URL: `http://localhost:8080` 4. Set dimensions (e.g., 1920x300) -### Multi-User Display (PHP Server - Polling) -1. Deploy PHP server to web hosting -2. Each user enables "Server Sync" in settings -3. Enter same room name and passphrase -4. In OBS: Add "Browser" source -5. URL: `https://your-domain.com/transcription/display-polling.php?room=ROOM&fade=10` - ### Multi-User Display (Node.js Server) 1. Deploy Node.js server (see [server/nodejs/README.md](server/nodejs/README.md)) 2. Each user configures Server URL: `http://your-server:3000/api/send` 3. Enter same room name and passphrase 4. In OBS: Add "Browser" source -5. URL: `http://your-server:3000/display?room=ROOM&fade=10` +5. URL: `http://your-server:3000/display?room=ROOM&fade=10×tamps=true&maxlines=50&fontsize=16` +6. Customize URL parameters as needed: + - `timestamps=false` - Hide timestamps + - `maxlines=30` - Show max 30 lines (prevents scroll bars) + - `fontsize=18` - Larger font + - `fontfamily=Courier` - Different font ## Performance Optimization @@ -314,7 +304,7 @@ See [server/COMPARISON.md](server/COMPARISON.md) and [server/QUICK_FIX.md](serve - ✅ **Phase 1**: Standalone desktop application (complete) - ✅ **Web Server**: Local OBS integration (complete) - ✅ **Builds**: PyInstaller executables (complete) -- 🚧 **Phase 2**: Multi-user PHP server (functional, optional) +- ✅ **Phase 2**: Multi-user Node.js server (complete, optional) - ⏸️ **Phase 3+**: Advanced features (see [NEXT_STEPS.md](NEXT_STEPS.md)) ## Related Documentation @@ -323,4 +313,4 @@ See [server/COMPARISON.md](server/COMPARISON.md) and [server/QUICK_FIX.md](serve - [BUILD.md](BUILD.md) - Detailed build instructions - [INSTALL.md](INSTALL.md) - Installation guide - [NEXT_STEPS.md](NEXT_STEPS.md) - Future enhancements -- [server/php/README.md](server/php/README.md) - PHP server setup +- [server/nodejs/README.md](server/nodejs/README.md) - Node.js server setup and deployment diff --git a/config/default_config.yaml b/config/default_config.yaml index 99f4f6a..5809ee6 100644 --- a/config/default_config.yaml +++ b/config/default_config.yaml @@ -25,7 +25,7 @@ processing: server_sync: enabled: false - url: "http://localhost/transcription/server.php" + url: "http://localhost:3000/api/send" room: "default" passphrase: "" diff --git a/gui/main_window_qt.py b/gui/main_window_qt.py index 8946f99..f2f4f85 100644 --- a/gui/main_window_qt.py +++ b/gui/main_window_qt.py @@ -273,6 +273,9 @@ class MainWindow(QMainWindow): port = self.config.get('web_server.port', 8080) show_timestamps = self.config.get('display.show_timestamps', True) fade_after_seconds = self.config.get('display.fade_after_seconds', 10) + max_lines = self.config.get('display.max_lines', 50) + font_family = self.config.get('display.font_family', 'Arial') + font_size = self.config.get('display.font_size', 16) # Try up to 5 ports if the default is in use ports_to_try = [port] + [port + i for i in range(1, 5)] @@ -284,7 +287,10 @@ class MainWindow(QMainWindow): host=host, port=try_port, show_timestamps=show_timestamps, - fade_after_seconds=fade_after_seconds + fade_after_seconds=fade_after_seconds, + max_lines=max_lines, + font_family=font_family, + font_size=font_size ) self.web_server_thread = WebServerThread(self.web_server) self.web_server_thread.start() @@ -530,6 +536,9 @@ class MainWindow(QMainWindow): if self.web_server: self.web_server.show_timestamps = show_timestamps self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10) + self.web_server.max_lines = self.config.get('display.max_lines', 50) + self.web_server.font_family = self.config.get('display.font_family', 'Arial') + self.web_server.font_size = self.config.get('display.font_size', 16) # Restart server sync if it was running and settings changed if self.is_transcribing and self.server_sync_client: diff --git a/gui/settings_dialog_qt.py b/gui/settings_dialog_qt.py index ebcdf5e..cb6af4a 100644 --- a/gui/settings_dialog_qt.py +++ b/gui/settings_dialog_qt.py @@ -155,7 +155,7 @@ class SettingsDialog(QDialog): server_layout.addRow("Enable Server Sync:", self.server_enabled_check) self.server_url_input = QLineEdit() - self.server_url_input.setPlaceholderText("http://example.com/transcription/server.php") + self.server_url_input.setPlaceholderText("http://your-server:3000/api/send") server_layout.addRow("Server URL:", self.server_url_input) self.server_room_input = QLineEdit() diff --git a/server/nodejs/server.js b/server/nodejs/server.js index fb4463d..49b4e65 100644 --- a/server/nodejs/server.js +++ b/server/nodejs/server.js @@ -411,6 +411,19 @@ app.get('/', (req, res) => {

Add a Browser source in OBS and paste this URL. Set width to 1920 and height to 200-400px.

+
+ ⚙️ URL Parameters (Optional) + +

+ Example: ?room=myroom&fade=15×tamps=false&maxlines=30&fontsize=18 +

+
@@ -528,7 +541,7 @@ app.get('/', (req, res) => { // Build URLs const serverUrl = \`http://\${window.location.host}/api/send\`; - const displayUrl = \`http://\${window.location.host}/display?room=\${encodeURIComponent(room)}&fade=10×tamps=true\`; + const displayUrl = \`http://\${window.location.host}/display?room=\${encodeURIComponent(room)}&fade=10×tamps=true&maxlines=50&fontsize=16&fontfamily=Arial\`; // Update UI document.getElementById('serverUrl').textContent = serverUrl; @@ -636,7 +649,7 @@ app.get('/api/list', async (req, res) => { // Serve display page app.get('/display', (req, res) => { - const { room = 'default', fade = '10', timestamps = 'true' } = req.query; + const { room = 'default', fade = '10', timestamps = 'true', maxlines = '50', fontsize = '16', fontfamily = 'Arial' } = req.query; res.send(` @@ -649,12 +662,13 @@ app.get('/display', (req, res) => { margin: 0; padding: 20px; background: transparent; - font-family: Arial, sans-serif; + font-family: ${fontfamily}, sans-serif; + font-size: ${fontsize}px; color: white; + overflow: hidden; } #transcriptions { - max-height: 100vh; - overflow-y: auto; + overflow: hidden; } .transcription { margin: 10px 0; @@ -705,7 +719,8 @@ app.get('/display', (req, res) => { - - diff --git a/server/php/display-polling.php b/server/php/display-polling.php deleted file mode 100644 index 526f83c..0000000 --- a/server/php/display-polling.php +++ /dev/null @@ -1,230 +0,0 @@ - - - - Multi-User Transcription Display (Polling) - - - - - -
🟡 Polling...
-
- - - - diff --git a/server/php/display.php b/server/php/display.php deleted file mode 100644 index ce63225..0000000 --- a/server/php/display.php +++ /dev/null @@ -1,183 +0,0 @@ - - - - Multi-User Transcription Display - - - - - -
⚫ Connecting...
-
- - - - diff --git a/server/php/index.html b/server/php/index.html deleted file mode 100644 index 64fcb7c..0000000 --- a/server/php/index.html +++ /dev/null @@ -1,360 +0,0 @@ - - - - - - Multi-User Transcription Server - - - -
-
-

🎙️ Multi-User Transcription Server

-

Merge captions from multiple streamers into a single OBS display

-
- -
- -
-

What is this?

-

This server allows multiple streamers using the Local Transcription app to merge their real-time captions into a single stream. Perfect for collaborative streams, podcasts, or gaming sessions with multiple commentators.

- -
-
-

🔒 Secure

-

Room-based isolation with passphrase authentication

-
-
-

🎨 Colorful

-

Each user gets a unique color (supports 20+ users)

-
-
-

⚡ Real-time

-

Low-latency streaming via Server-Sent Events

-
-
-

🌐 Universal

-

Works on any standard PHP hosting

-
-
-
- - -
-

Get Started

-

Click the button below to generate a unique room with random credentials:

- -
- - -
-
-

📱 For Desktop App Users

-
Room Name:
-
-
- - -
Passphrase:
-
-
- - -
Server URL:
-
-
- -
- -
-

📺 For OBS Browser Source

-
Display URL:
-
-
- - -
- Note: The display URL does not contain the passphrase for security. Only users with the passphrase can send transcriptions. -
-
-
-
-
- - -
-

How to Use

-
-
-

Generate Room Credentials

-

Click "Generate New Room" above to create a unique room with a random name and passphrase. Share these with your streaming team.

-
-
-

Configure Desktop App

-

In the Local Transcription app, go to Settings → Server Sync and enter:

-
    -
  • Enable Server Sync: ✓
  • -
  • Server URL: (from above)
  • -
  • Room Name: (from above)
  • -
  • Passphrase: (from above)
  • -
-
-
-

Add to OBS

-

In OBS, add a Browser source and paste the Display URL. Set width to 1920 and height to your preference (e.g., 200-400px).

-
-
-

Start Streaming!

-

All team members start transcription in their apps. Captions from everyone appear merged in OBS with different colors per person.

-
-
-
- - -
-

Frequently Asked Questions

- -

How many users can join one room?

-

Technically unlimited, but we've tested up to 20 users successfully. Each user gets a unique color.

- -

Is my passphrase secure?

-

Yes! Passphrases are hashed using PHP's password_hash() function. They're never stored in plain text.

- -

How long does a room stay active?

-

Rooms are automatically cleaned up after 2 hours of inactivity to save server resources.

- -

Can I use custom room names?

-

Yes! You can use any room name you want instead of the randomly generated one. Just make sure all team members use the exact same name.

-
-
-
- - - - diff --git a/server/php/server.php b/server/php/server.php deleted file mode 100644 index 1d3f356..0000000 --- a/server/php/server.php +++ /dev/null @@ -1,282 +0,0 @@ - sanitize($data['user_name']), - 'text' => sanitize($data['text']), - 'timestamp' => $data['timestamp'] ?? date('H:i:s'), - 'created_at' => time() - ]; - - // Add to room - addTranscription($room, $transcription); - - // Cleanup old sessions - cleanupOldSessions(); - - // Success response - sendJson(['status' => 'ok', 'message' => 'Transcription added']); -} - -/** - * Handle streaming transcriptions via Server-Sent Events - * Note: Passphrase is optional for streaming (read-only access) - */ -function handleStream() { - // Get parameters - $room = sanitize($_GET['room'] ?? ''); - - if (empty($room)) { - sendError('Missing room name', 400); - } - - // Set SSE headers - header('Content-Type: text/event-stream'); - header('Cache-Control: no-cache'); - header('X-Accel-Buffering: no'); // Disable nginx buffering - - // Passphrase is optional for streaming (read-only) - // If room doesn't exist yet, we'll keep the connection open and wait for it - - // Track last known count - $lastCount = 0; - - // Stream loop - while (true) { - $transcriptions = getTranscriptions($room); - $currentCount = count($transcriptions); - - // If new transcriptions, send them - if ($currentCount > $lastCount) { - $newTranscriptions = array_slice($transcriptions, $lastCount); - foreach ($newTranscriptions as $trans) { - echo "data: " . json_encode($trans) . "\n\n"; - flush(); - } - $lastCount = $currentCount; - } - - // Send keepalive comment every 1 second (keeps SSE connection alive) - echo ": keepalive\n\n"; - flush(); - - // Check if client disconnected - if (connection_aborted()) { - break; - } - - // Wait before next check - sleep(1); - } -} - -/** - * Handle listing recent transcriptions - * Note: Passphrase is optional for listing (read-only access) - */ -function handleList() { - $room = sanitize($_GET['room'] ?? ''); - - if (empty($room)) { - sendError('Missing room name', 400); - } - - // Passphrase is optional for read-only access - // If room doesn't exist, return empty array - $transcriptions = getTranscriptions($room); - sendJson(['transcriptions' => $transcriptions]); -} - -/** - * Handle info request - */ -function handleInfo() { - sendJson([ - 'service' => 'Local Transcription Multi-User Server', - 'version' => '1.0.0', - 'endpoints' => [ - 'POST ?action=send' => 'Send a transcription', - 'GET ?action=stream' => 'Stream transcriptions (SSE)', - 'GET ?action=list' => 'List recent transcriptions' - ] - ]); -} - -/** - * Verify passphrase for a room - */ -function verifyPassphrase($room, $passphrase) { - $file = getRoomFile($room); - - // If room doesn't exist, create it with this passphrase - if (!file_exists($file)) { - $roomData = [ - 'passphrase_hash' => password_hash($passphrase, PASSWORD_DEFAULT), - 'created_at' => time(), - 'transcriptions' => [] - ]; - file_put_contents($file, json_encode($roomData)); - return true; - } - - // Verify passphrase - $roomData = json_decode(file_get_contents($file), true); - return password_verify($passphrase, $roomData['passphrase_hash']); -} - -/** - * Add transcription to room - */ -function addTranscription($room, $transcription) { - $file = getRoomFile($room); - $roomData = json_decode(file_get_contents($file), true); - - // Add transcription - $roomData['transcriptions'][] = $transcription; - - // Limit to max transcriptions - if (count($roomData['transcriptions']) > MAX_TRANSCRIPTIONS_PER_ROOM) { - $roomData['transcriptions'] = array_slice( - $roomData['transcriptions'], - -MAX_TRANSCRIPTIONS_PER_ROOM - ); - } - - // Update last activity - $roomData['last_activity'] = time(); - - // Save - file_put_contents($file, json_encode($roomData)); -} - -/** - * Get transcriptions for a room - */ -function getTranscriptions($room) { - $file = getRoomFile($room); - if (!file_exists($file)) { - return []; - } - - $roomData = json_decode(file_get_contents($file), true); - return $roomData['transcriptions'] ?? []; -} - -/** - * Get room data file path - */ -function getRoomFile($room) { - return STORAGE_DIR . '/room_' . md5($room) . '.json'; -} - -/** - * Check if room exists - */ -function roomExists($room) { - return file_exists(getRoomFile($room)); -} - -/** - * Cleanup old sessions - */ -function cleanupOldSessions() { - $files = glob(STORAGE_DIR . '/room_*.json'); - $now = time(); - - foreach ($files as $file) { - $data = json_decode(file_get_contents($file), true); - $lastActivity = $data['last_activity'] ?? $data['created_at']; - - if ($now - $lastActivity > CLEANUP_THRESHOLD) { - unlink($file); - } - } -} - -/** - * Sanitize input - */ -function sanitize($input) { - return htmlspecialchars(strip_tags(trim($input)), ENT_QUOTES, 'UTF-8'); -} - -/** - * Send JSON response - */ -function sendJson($data, $code = 200) { - http_response_code($code); - header('Content-Type: application/json'); - echo json_encode($data); - exit(); -} - -/** - * Send error response - */ -function sendError($message, $code = 400) { - sendJson(['error' => $message], $code); -} -?> diff --git a/server/web_display.py b/server/web_display.py index 26a369f..72a804e 100644 --- a/server/web_display.py +++ b/server/web_display.py @@ -11,7 +11,7 @@ from datetime import datetime class TranscriptionWebServer: """Web server for displaying transcriptions.""" - def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10): + def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10, max_lines: int = 50, font_family: str = "Arial", font_size: int = 16): """ Initialize web server. @@ -20,11 +20,17 @@ class TranscriptionWebServer: port: Server port show_timestamps: Whether to show timestamps in transcriptions fade_after_seconds: Time in seconds before transcriptions fade out (0 = never fade) + max_lines: Maximum number of lines to display at once + font_family: Font family for display + font_size: Font size in pixels """ self.host = host self.port = port self.show_timestamps = show_timestamps self.fade_after_seconds = fade_after_seconds + self.max_lines = max_lines + self.font_family = font_family + self.font_size = font_size self.app = FastAPI() self.active_connections: List[WebSocket] = [] self.transcriptions = [] # Store recent transcriptions @@ -70,12 +76,13 @@ class TranscriptionWebServer: margin: 0; padding: 20px; background: transparent; - font-family: Arial, sans-serif; + font-family: {self.font_family}, sans-serif; + font-size: {self.font_size}px; color: white; + overflow: hidden; }} #transcriptions {{ - max-height: 100vh; - overflow-y: auto; + overflow: hidden; }} .transcription {{ margin: 10px 0; @@ -120,6 +127,7 @@ class TranscriptionWebServer: const container = document.getElementById('transcriptions'); const ws = new WebSocket(`ws://${{window.location.host}}/ws`); const fadeAfterSeconds = {self.fade_after_seconds}; + const maxLines = {self.max_lines}; ws.onmessage = (event) => {{ const data = JSON.parse(event.data); @@ -154,9 +162,6 @@ class TranscriptionWebServer: div.innerHTML = html; container.appendChild(div); - // Auto-scroll to bottom - container.scrollTop = container.scrollHeight; - // Set up fade-out if enabled if (fadeAfterSeconds > 0) {{ setTimeout(() => {{ @@ -172,8 +177,8 @@ class TranscriptionWebServer: }}, fadeAfterSeconds * 1000); }} - // Limit to 50 transcriptions (fallback) - while (container.children.length > 50) {{ + // Enforce max lines limit + while (container.children.length > maxLines) {{ container.removeChild(container.firstChild); }} }}