Add unified per-speaker font support and remote transcription service
Font changes: - Consolidate font settings into single Display Settings section - Support Web-Safe, Google Fonts, and Custom File uploads for both displays - Fix Google Fonts URL encoding (use + instead of %2B for spaces) - Fix per-speaker font inline style quote escaping in Node.js display - Add font debug logging to help diagnose font issues - Update web server to sync all font settings on settings change - Remove deprecated PHP server documentation files New features: - Add remote transcription service for GPU offloading - Add instance lock to prevent multiple app instances - Add version tracking Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -29,7 +29,7 @@ class TranscriptionResult:
|
||||
def __repr__(self) -> str:
|
||||
time_str = self.timestamp.strftime("%H:%M:%S")
|
||||
prefix = "[FINAL]" if self.is_final else "[PREVIEW]"
|
||||
if self.user_name:
|
||||
if self.user_name and self.user_name.strip():
|
||||
return f"{prefix} [{time_str}] {self.user_name}: {self.text}"
|
||||
return f"{prefix} [{time_str}] {self.text}"
|
||||
|
||||
@@ -63,6 +63,7 @@ class RealtimeTranscriptionEngine:
|
||||
# Realtime preview settings
|
||||
enable_realtime_transcription: bool = False,
|
||||
realtime_model: str = "tiny.en",
|
||||
realtime_processing_pause: float = 0.1, # How often to update preview (lower = more frequent)
|
||||
# VAD settings
|
||||
silero_sensitivity: float = 0.4,
|
||||
silero_use_onnx: bool = True,
|
||||
@@ -106,11 +107,21 @@ class RealtimeTranscriptionEngine:
|
||||
user_name: User name for transcriptions
|
||||
"""
|
||||
self.model = model
|
||||
self.device = device
|
||||
self.language = language
|
||||
self.compute_type = compute_type
|
||||
|
||||
# Resolve device - 'auto' means use CUDA if available, else CPU
|
||||
if device == 'auto':
|
||||
try:
|
||||
import torch
|
||||
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
except:
|
||||
self.device = 'cpu'
|
||||
else:
|
||||
self.device = device
|
||||
self.enable_realtime = enable_realtime_transcription
|
||||
self.realtime_model = realtime_model
|
||||
self.realtime_processing_pause = realtime_processing_pause
|
||||
self.user_name = user_name
|
||||
|
||||
# Callbacks
|
||||
@@ -131,6 +142,7 @@ class RealtimeTranscriptionEngine:
|
||||
# Store configuration for recorder initialization
|
||||
self.config = {
|
||||
'model': model,
|
||||
'device': self.device, # Use resolved device (auto -> cuda/cpu)
|
||||
'language': language if language != 'auto' else None,
|
||||
'compute_type': compute_type if compute_type != 'default' else 'default',
|
||||
'input_device_index': input_device_index,
|
||||
@@ -145,8 +157,18 @@ class RealtimeTranscriptionEngine:
|
||||
'initial_prompt': initial_prompt if initial_prompt else None,
|
||||
'enable_realtime_transcription': enable_realtime_transcription,
|
||||
'realtime_model_type': realtime_model if enable_realtime_transcription else None,
|
||||
'realtime_processing_pause': realtime_processing_pause if enable_realtime_transcription else 0.2,
|
||||
# The realtime callback is added during initialize() after set_callbacks is called
|
||||
}
|
||||
|
||||
def _is_cuda_available(self) -> bool:
|
||||
"""Check if CUDA is available."""
|
||||
try:
|
||||
import torch
|
||||
return torch.cuda.is_available()
|
||||
except:
|
||||
return False
|
||||
|
||||
def set_callbacks(
|
||||
self,
|
||||
realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None,
|
||||
@@ -198,8 +220,15 @@ class RealtimeTranscriptionEngine:
|
||||
|
||||
try:
|
||||
print(f"Initializing RealtimeSTT with model: {self.model}")
|
||||
print(f" Device: {self.device}, Compute type: {self.compute_type}")
|
||||
if self.enable_realtime:
|
||||
print(f" Realtime preview enabled with model: {self.realtime_model}")
|
||||
print(f" Realtime processing pause: {self.realtime_processing_pause}s")
|
||||
|
||||
# Add realtime transcription callback if enabled
|
||||
# This provides word-by-word updates as speech is being processed
|
||||
if self.enable_realtime:
|
||||
self.config['on_realtime_transcription_update'] = self._on_realtime_transcription
|
||||
|
||||
# Create recorder with configuration
|
||||
self.recorder = AudioToTextRecorder(**self.config)
|
||||
@@ -325,7 +354,7 @@ class RealtimeTranscriptionEngine:
|
||||
Returns:
|
||||
True if model changed successfully
|
||||
"""
|
||||
was_running = self.is_running
|
||||
was_running = self.is_recording
|
||||
|
||||
# Stop current recording
|
||||
self.stop()
|
||||
@@ -355,7 +384,7 @@ class RealtimeTranscriptionEngine:
|
||||
Returns:
|
||||
True if device changed successfully
|
||||
"""
|
||||
was_running = self.is_running
|
||||
was_running = self.is_recording
|
||||
|
||||
# Stop current recording
|
||||
self.stop()
|
||||
@@ -396,7 +425,7 @@ class RealtimeTranscriptionEngine:
|
||||
self.config['webrtc_sensitivity'] = webrtc_sensitivity
|
||||
|
||||
# If running, need to restart to apply changes
|
||||
if self.is_running:
|
||||
if self.is_recording:
|
||||
print("VAD settings updated. Restart transcription to apply changes.")
|
||||
|
||||
def set_user_name(self, user_name: str):
|
||||
@@ -404,7 +433,7 @@ class RealtimeTranscriptionEngine:
|
||||
self.user_name = user_name
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"RealtimeTranscriptionEngine(model={self.model}, device={self.device}, running={self.is_running})"
|
||||
return f"RealtimeTranscriptionEngine(model={self.model}, device={self.device}, running={self.is_recording})"
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup when object is destroyed."""
|
||||
|
||||
Reference in New Issue
Block a user