local-transcription/config/default_config.yaml

user:
  name: "User"
  id: ""

audio:
  input_device: "default"
  sample_rate: 16000

transcription:
  # RealtimeSTT model settings
  model: "base.en"  # Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3
  device: "auto"  # auto, cuda, cpu
  language: "en"
  compute_type: "default"  # default, int8, float16, float32

  # Realtime preview settings (optional faster preview before final transcription)
  enable_realtime_transcription: false
  realtime_model: "tiny.en"  # Faster model for instant preview
  realtime_processing_pause: 0.1  # Seconds between preview updates (lower = more responsive, default 0.1)

  # VAD (Voice Activity Detection) settings
  silero_sensitivity: 0.4  # 0.0-1.0, lower = more sensitive (detects more speech)
  silero_use_onnx: true  # Use ONNX for 2-3x faster VAD with lower CPU usage
  webrtc_sensitivity: 3  # 0-3, lower = more sensitive

  # Post-processing settings
  post_speech_silence_duration: 0.3  # Seconds of silence before finalizing transcription
  min_length_of_recording: 0.5  # Minimum recording length in seconds
  min_gap_between_recordings: 0  # Minimum gap between recordings in seconds
  pre_recording_buffer_duration: 0.2  # Buffer before speech starts (prevents cut-off words)

  # Transcription quality settings
  beam_size: 5  # Higher = better quality but slower (1-10)
  initial_prompt: ""  # Optional prompt to guide transcription style

  # Performance settings
  no_log_file: true  # Disable RealtimeSTT logging

  # Fast speaker mode - for speakers who talk quickly without pauses
  # Reduces silence detection thresholds for more frequent transcription outputs
  continuous_mode: false

server_sync:
  enabled: false
  url: "http://localhost:3000/api/send"
  room: "default"
  passphrase: ""
  # Font settings are now in the display section (shared for local and server sync)

display:
  show_timestamps: true
  max_lines: 100
  # Font settings (used for both local display and server sync)
  font_source: "System Font"  # Options: System Font, Web-Safe, Google Font, Custom File
  font_family: "Courier"  # System font name (local only, won't work with server sync)
  websafe_font: "Arial"  # Web-safe font name
  google_font: "Roboto"  # Google Font name
  custom_font_file: ""  # Path to custom font file (.ttf, .otf, .woff, .woff2)
  font_size: 12
  theme: "dark"
  fade_after_seconds: 10  # Time before transcriptions fade out (0 = never fade)
  # Color settings (used for both local display and server sync)
  user_color: "#4CAF50"  # User's name color (default green)
  text_color: "#FFFFFF"  # Text/font color (default white)
  background_color: "#000000B3"  # Background color with alpha (default semi-transparent black)

web_server:
  port: 8080
  host: "127.0.0.1"

remote:
  mode: local  # local | managed | byok
  server_url: ""  # Proxy server URL for managed mode (e.g., wss://your-proxy.com)
  auth_token: ""  # JWT stored after login (managed mode)
  byok_api_key: ""  # Deepgram API key for BYOK mode
  deepgram_model: nova-2  # Deepgram model to use
  language: en-US  # Language code
  fallback_to_local: true  # Fall back to local Whisper if remote fails

updates:
  auto_check: true  # Check for updates on startup
  gitea_url: "https://repo.anhonesthost.net"  # Base URL of Gitea server
  owner: "streamer-tools"  # Repository owner/organization name
  repo: "local-transcription"  # Repository name
  skipped_versions: []  # List of versions the user chose to skip
  last_check: ""  # ISO timestamp of last update check
  check_interval_hours: 24  # Hours between automatic update checks