Initial commit: Local Transcription App v1.0
Phase 1 Complete - Standalone Desktop Application Features: - Real-time speech-to-text with Whisper (faster-whisper) - PySide6 desktop GUI with settings dialog - Web server for OBS browser source integration - Audio capture with automatic sample rate detection and resampling - Noise suppression with Voice Activity Detection (VAD) - Configurable display settings (font, timestamps, fade duration) - Settings apply without restart (with automatic model reloading) - Auto-fade for web display transcriptions - CPU/GPU support with automatic device detection - Standalone executable builds (PyInstaller) - CUDA build support (works on systems without CUDA hardware) Components: - Audio capture with sounddevice - Noise reduction with noisereduce + webrtcvad - Transcription with faster-whisper - GUI with PySide6 - Web server with FastAPI + WebSocket - Configuration system with YAML Build System: - Standard builds (CPU-only): build.sh / build.bat - CUDA builds (universal): build-cuda.sh / build-cuda.bat - Comprehensive BUILD.md documentation - Cross-platform support (Linux, Windows) Documentation: - README.md with project overview and quick start - BUILD.md with detailed build instructions - NEXT_STEPS.md with future enhancement roadmap - INSTALL.md with setup instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
0
server/__init__.py
Normal file
0
server/__init__.py
Normal file
233
server/web_display.py
Normal file
233
server/web_display.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""Web server for displaying transcriptions in a browser (for OBS browser source)."""
|
||||
|
||||
import asyncio
|
||||
from fastapi import FastAPI, WebSocket
|
||||
from fastapi.responses import HTMLResponse
|
||||
from typing import List, Optional
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TranscriptionWebServer:
|
||||
"""Web server for displaying transcriptions."""
|
||||
|
||||
def __init__(self, host: str = "127.0.0.1", port: int = 8080, show_timestamps: bool = True, fade_after_seconds: int = 10):
|
||||
"""
|
||||
Initialize web server.
|
||||
|
||||
Args:
|
||||
host: Server host address
|
||||
port: Server port
|
||||
show_timestamps: Whether to show timestamps in transcriptions
|
||||
fade_after_seconds: Time in seconds before transcriptions fade out (0 = never fade)
|
||||
"""
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.show_timestamps = show_timestamps
|
||||
self.fade_after_seconds = fade_after_seconds
|
||||
self.app = FastAPI()
|
||||
self.active_connections: List[WebSocket] = []
|
||||
self.transcriptions = [] # Store recent transcriptions
|
||||
|
||||
# Setup routes
|
||||
self._setup_routes()
|
||||
|
||||
def _setup_routes(self):
|
||||
"""Setup FastAPI routes."""
|
||||
|
||||
@self.app.get("/", response_class=HTMLResponse)
|
||||
async def get_display():
|
||||
"""Serve the transcription display page."""
|
||||
return self._get_html()
|
||||
|
||||
@self.app.websocket("/ws")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
"""WebSocket endpoint for real-time updates."""
|
||||
await websocket.accept()
|
||||
self.active_connections.append(websocket)
|
||||
|
||||
try:
|
||||
# Send recent transcriptions
|
||||
for trans in self.transcriptions[-20:]: # Last 20
|
||||
await websocket.send_json(trans)
|
||||
|
||||
# Keep connection alive
|
||||
while True:
|
||||
# Wait for ping/pong to keep connection alive
|
||||
await websocket.receive_text()
|
||||
except:
|
||||
self.active_connections.remove(websocket)
|
||||
|
||||
def _get_html(self) -> str:
|
||||
"""Generate HTML for transcription display."""
|
||||
return f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Transcription Display</title>
|
||||
<style>
|
||||
body {{
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
background: transparent;
|
||||
font-family: Arial, sans-serif;
|
||||
color: white;
|
||||
}}
|
||||
#transcriptions {{
|
||||
max-height: 100vh;
|
||||
overflow-y: auto;
|
||||
}}
|
||||
.transcription {{
|
||||
margin: 10px 0;
|
||||
padding: 10px;
|
||||
background: rgba(0, 0, 0, 0.7);
|
||||
border-radius: 5px;
|
||||
animation: slideIn 0.3s ease-out;
|
||||
transition: opacity 1s ease-out;
|
||||
}}
|
||||
.transcription.fading {{
|
||||
opacity: 0;
|
||||
}}
|
||||
.timestamp {{
|
||||
color: #888;
|
||||
font-size: 0.9em;
|
||||
margin-right: 10px;
|
||||
}}
|
||||
.user {{
|
||||
color: #4CAF50;
|
||||
font-weight: bold;
|
||||
margin-right: 10px;
|
||||
}}
|
||||
.text {{
|
||||
color: white;
|
||||
}}
|
||||
@keyframes slideIn {{
|
||||
from {{
|
||||
opacity: 0;
|
||||
transform: translateY(-10px);
|
||||
}}
|
||||
to {{
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}}
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="transcriptions"></div>
|
||||
|
||||
<script>
|
||||
const container = document.getElementById('transcriptions');
|
||||
const ws = new WebSocket(`ws://${{window.location.host}}/ws`);
|
||||
const fadeAfterSeconds = {self.fade_after_seconds};
|
||||
|
||||
ws.onmessage = (event) => {{
|
||||
const data = JSON.parse(event.data);
|
||||
addTranscription(data);
|
||||
}};
|
||||
|
||||
ws.onclose = () => {{
|
||||
console.log('WebSocket closed. Attempting to reconnect...');
|
||||
setTimeout(() => location.reload(), 3000);
|
||||
}};
|
||||
|
||||
// Send keepalive pings
|
||||
setInterval(() => {{
|
||||
if (ws.readyState === WebSocket.OPEN) {{
|
||||
ws.send('ping');
|
||||
}}
|
||||
}}, 30000);
|
||||
|
||||
function addTranscription(data) {{
|
||||
const div = document.createElement('div');
|
||||
div.className = 'transcription';
|
||||
|
||||
let html = '';
|
||||
if (data.timestamp) {{
|
||||
html += `<span class="timestamp">[${{data.timestamp}}]</span>`;
|
||||
}}
|
||||
if (data.user_name) {{
|
||||
html += `<span class="user">${{data.user_name}}:</span>`;
|
||||
}}
|
||||
html += `<span class="text">${{data.text}}</span>`;
|
||||
|
||||
div.innerHTML = html;
|
||||
container.appendChild(div);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
container.scrollTop = container.scrollHeight;
|
||||
|
||||
// Set up fade-out if enabled
|
||||
if (fadeAfterSeconds > 0) {{
|
||||
setTimeout(() => {{
|
||||
// Start fade animation
|
||||
div.classList.add('fading');
|
||||
|
||||
// Remove element after fade completes
|
||||
setTimeout(() => {{
|
||||
if (div.parentNode === container) {{
|
||||
container.removeChild(div);
|
||||
}}
|
||||
}}, 1000); // Match the CSS transition duration
|
||||
}}, fadeAfterSeconds * 1000);
|
||||
}}
|
||||
|
||||
// Limit to 50 transcriptions (fallback)
|
||||
while (container.children.length > 50) {{
|
||||
container.removeChild(container.firstChild);
|
||||
}}
|
||||
}}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
async def broadcast_transcription(self, text: str, user_name: str = "", timestamp: Optional[datetime] = None):
|
||||
"""
|
||||
Broadcast a transcription to all connected clients.
|
||||
|
||||
Args:
|
||||
text: Transcription text
|
||||
user_name: User/speaker name
|
||||
timestamp: Timestamp of transcription
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = datetime.now()
|
||||
|
||||
trans_data = {
|
||||
"text": text,
|
||||
"user_name": user_name,
|
||||
}
|
||||
|
||||
# Only include timestamp if enabled
|
||||
if self.show_timestamps:
|
||||
trans_data["timestamp"] = timestamp.strftime("%H:%M:%S")
|
||||
|
||||
# Store transcription
|
||||
self.transcriptions.append(trans_data)
|
||||
if len(self.transcriptions) > 100:
|
||||
self.transcriptions.pop(0)
|
||||
|
||||
# Broadcast to all connected clients
|
||||
disconnected = []
|
||||
for connection in self.active_connections:
|
||||
try:
|
||||
await connection.send_json(trans_data)
|
||||
except:
|
||||
disconnected.append(connection)
|
||||
|
||||
# Remove disconnected clients
|
||||
for conn in disconnected:
|
||||
self.active_connections.remove(conn)
|
||||
|
||||
async def start(self):
|
||||
"""Start the web server."""
|
||||
import uvicorn
|
||||
config = uvicorn.Config(
|
||||
self.app,
|
||||
host=self.host,
|
||||
port=self.port,
|
||||
log_level="warning"
|
||||
)
|
||||
server = uvicorn.Server(config)
|
||||
await server.serve()
|
||||
Reference in New Issue
Block a user