user:
name: "User"
id: ""
audio:
input_device: "default"
sample_rate: 16000
transcription:
# RealtimeSTT model settings
model: "base.en" # Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3
device: "auto" # auto, cuda, cpu
language: "en"
compute_type: "default" # default, int8, float16, float32
# Realtime preview settings (optional faster preview before final transcription)
enable_realtime_transcription: false
realtime_model: "tiny.en" # Faster model for instant preview
# VAD (Voice Activity Detection) settings
silero_sensitivity: 0.4 # 0.0-1.0, lower = more sensitive (detects more speech)
silero_use_onnx: true # Use ONNX for 2-3x faster VAD with lower CPU usage
webrtc_sensitivity: 3 # 0-3, lower = more sensitive
# Post-processing settings
post_speech_silence_duration: 0.3 # Seconds of silence before finalizing transcription
min_length_of_recording: 0.5 # Minimum recording length in seconds
min_gap_between_recordings: 0 # Minimum gap between recordings in seconds
pre_recording_buffer_duration: 0.2 # Buffer before speech starts (prevents cut-off words)
# Transcription quality settings
beam_size: 5 # Higher = better quality but slower (1-10)
initial_prompt: "" # Optional prompt to guide transcription style
# Performance settings
no_log_file: true # Disable RealtimeSTT logging
server_sync:
enabled: false
url: "http://localhost:3000/api/send"
room: "default"
passphrase: ""
display:
show_timestamps: true
max_lines: 100
font_family: "Courier"
font_size: 12
theme: "dark"
fade_after_seconds: 10 # Time before transcriptions fade out (0 = never fade)
web_server:
port: 8080
host: "127.0.0.1"