2025-12-25 18:48:23 -08:00
|
|
|
user:
|
|
|
|
|
name: "User"
|
|
|
|
|
id: ""
|
|
|
|
|
|
|
|
|
|
audio:
|
|
|
|
|
input_device: "default"
|
|
|
|
|
sample_rate: 16000
|
|
|
|
|
|
|
|
|
|
transcription:
|
2025-12-28 18:48:29 -08:00
|
|
|
# RealtimeSTT model settings
|
|
|
|
|
model: "base.en" # Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3
|
|
|
|
|
device: "auto" # auto, cuda, cpu
|
2025-12-25 18:48:23 -08:00
|
|
|
language: "en"
|
2025-12-28 18:48:29 -08:00
|
|
|
compute_type: "default" # default, int8, float16, float32
|
|
|
|
|
|
|
|
|
|
# Realtime preview settings (optional faster preview before final transcription)
|
|
|
|
|
enable_realtime_transcription: false
|
|
|
|
|
realtime_model: "tiny.en" # Faster model for instant preview
|
2026-01-11 18:56:12 -08:00
|
|
|
realtime_processing_pause: 0.1 # Seconds between preview updates (lower = more responsive, default 0.1)
|
2025-12-28 18:48:29 -08:00
|
|
|
|
|
|
|
|
# VAD (Voice Activity Detection) settings
|
|
|
|
|
silero_sensitivity: 0.4 # 0.0-1.0, lower = more sensitive (detects more speech)
|
|
|
|
|
silero_use_onnx: true # Use ONNX for 2-3x faster VAD with lower CPU usage
|
|
|
|
|
webrtc_sensitivity: 3 # 0-3, lower = more sensitive
|
|
|
|
|
|
|
|
|
|
# Post-processing settings
|
|
|
|
|
post_speech_silence_duration: 0.3 # Seconds of silence before finalizing transcription
|
|
|
|
|
min_length_of_recording: 0.5 # Minimum recording length in seconds
|
|
|
|
|
min_gap_between_recordings: 0 # Minimum gap between recordings in seconds
|
|
|
|
|
pre_recording_buffer_duration: 0.2 # Buffer before speech starts (prevents cut-off words)
|
|
|
|
|
|
|
|
|
|
# Transcription quality settings
|
|
|
|
|
beam_size: 5 # Higher = better quality but slower (1-10)
|
|
|
|
|
initial_prompt: "" # Optional prompt to guide transcription style
|
2025-12-25 18:48:23 -08:00
|
|
|
|
2025-12-28 18:48:29 -08:00
|
|
|
# Performance settings
|
|
|
|
|
no_log_file: true # Disable RealtimeSTT logging
|
2025-12-25 18:48:23 -08:00
|
|
|
|
2026-01-11 18:56:12 -08:00
|
|
|
# Fast speaker mode - for speakers who talk quickly without pauses
|
|
|
|
|
# Reduces silence detection thresholds for more frequent transcription outputs
|
|
|
|
|
continuous_mode: false
|
|
|
|
|
|
2025-12-25 18:48:23 -08:00
|
|
|
server_sync:
|
|
|
|
|
enabled: false
|
2025-12-27 06:15:55 -08:00
|
|
|
url: "http://localhost:3000/api/send"
|
2025-12-26 10:09:12 -08:00
|
|
|
room: "default"
|
|
|
|
|
passphrase: ""
|
2026-01-11 18:56:12 -08:00
|
|
|
# Font settings are now in the display section (shared for local and server sync)
|
2025-12-25 18:48:23 -08:00
|
|
|
|
|
|
|
|
display:
|
|
|
|
|
show_timestamps: true
|
|
|
|
|
max_lines: 100
|
2026-01-11 18:56:12 -08:00
|
|
|
# Font settings (used for both local display and server sync)
|
|
|
|
|
font_source: "System Font" # Options: System Font, Web-Safe, Google Font, Custom File
|
|
|
|
|
font_family: "Courier" # System font name (local only, won't work with server sync)
|
|
|
|
|
websafe_font: "Arial" # Web-safe font name
|
|
|
|
|
google_font: "Roboto" # Google Font name
|
|
|
|
|
custom_font_file: "" # Path to custom font file (.ttf, .otf, .woff, .woff2)
|
2025-12-25 18:48:23 -08:00
|
|
|
font_size: 12
|
|
|
|
|
theme: "dark"
|
|
|
|
|
fade_after_seconds: 10 # Time before transcriptions fade out (0 = never fade)
|
|
|
|
|
|
|
|
|
|
web_server:
|
|
|
|
|
port: 8080
|
|
|
|
|
host: "127.0.0.1"
|
2026-01-11 18:56:12 -08:00
|
|
|
|
|
|
|
|
remote_processing:
|
|
|
|
|
enabled: false # Enable remote transcription offloading
|
|
|
|
|
server_url: "" # WebSocket URL of remote transcription service (e.g., ws://your-server:8765/ws/transcribe)
|
|
|
|
|
api_key: "" # API key for authentication
|
|
|
|
|
fallback_to_local: true # Fall back to local processing if remote fails
|