chore: bump version to 2.0.1 [skip ci]

Bump to v2.0.0 — cross-platform Tauri rewrite
Major version bump reflecting the architecture change from PySide6/Qt to Tauri v2 + Svelte 5 with cross-platform support for Windows, macOS, and Linux. Key changes since v1.4.0: - Tauri v2 native desktop shell replacing PySide6/Qt - Svelte 5 reactive frontend - Headless Python backend as a downloadable sidecar - Deepgram cloud transcription (managed + BYOK) - Gitea CI/CD with per-OS builds and automated releases - Sidecar auto-update checking on startup - 63-test suite (Python + Svelte + Rust) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 15:58:01 +00:00 · 2026-04-07 08:55:25 -07:00 · 2026-04-07 08:54:18 -07:00 · 2026-04-07 15:51:32 +00:00 · 2026-04-07 08:46:31 -07:00 · 2026-04-07 15:44:54 +00:00
74 changed files with 23269 additions and 722 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,9 @@
 {
  "permissions": {
    "allow": [
      "Bash(python3:*)",
      "Bash(node --check:*)",
      "Bash(ls:*)"
    ]
  }
 }
--- a/.gitea/workflows/build-app-linux.yml
+++ b/.gitea/workflows/build-app-linux.yml
@@ -0,0 +1,103 @@
 name: Build App (Linux)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Release tag to build (e.g. v1.4.5)'
        required: true
 jobs:
  build-linux:
    name: Build App (Linux)
    runs-on: ubuntu-latest
    env:
      NODE_VERSION: "20"
    steps:
      - name: Determine tag
        id: tag
        run: |
          TAG="${{ inputs.tag }}"
          if [ -z "$TAG" ]; then
            TAG="${{ github.event.inputs.tag }}"
          fi
          if [ -z "$TAG" ]; then
            TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/v*' | head -1 | sed 's|.*refs/tags/||')
          fi
          echo "Building for tag: ${TAG}"
          echo "tag=${TAG}" >> $GITHUB_OUTPUT
      - uses: actions/checkout@v4
        with:
          ref: ${{ steps.tag.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.tag.outputs.tag }}"
          echo "Release tag: ${TAG}"
          echo "Waiting for release ${TAG} to be available..."
          RELEASE_ID=""
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" -o -name "*.AppImage" \) | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/build-app-macos.yml
+++ b/.gitea/workflows/build-app-macos.yml
@@ -0,0 +1,101 @@
 name: Build App (macOS)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Release tag to build (e.g. v1.4.5)'
        required: true
 jobs:
  build-macos:
    name: Build App (macOS)
    runs-on: macos-latest
    env:
      NODE_VERSION: "20"
    steps:
      - name: Determine tag
        id: tag
        run: |
          TAG="${{ inputs.tag }}"
          if [ -z "$TAG" ]; then
            TAG="${{ github.event.inputs.tag }}"
          fi
          if [ -z "$TAG" ]; then
            TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/v*' | head -1 | sed 's|.*refs/tags/||')
          fi
          echo "Building for tag: ${TAG}"
          echo "tag=${TAG}" >> $GITHUB_OUTPUT
      - uses: actions/checkout@v4
        with:
          ref: ${{ steps.tag.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: brew install --quiet create-dmg || true
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.tag.outputs.tag }}"
          echo "Release tag: ${TAG}"
          echo "Waiting for release ${TAG} to be available..."
          RELEASE_ID=""
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/build-app-windows.yml
+++ b/.gitea/workflows/build-app-windows.yml
@@ -0,0 +1,117 @@
 name: Build App (Windows)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Release tag to build (e.g. v1.4.5)'
        required: true
 env:
  NODE_VERSION: "20"
 jobs:
  build-windows:
    name: Build App (Windows)
    runs-on: windows-latest
    env:
      RELEASE_TAG: ${{ inputs.tag }}
    steps:
      - name: Show tag
        shell: powershell
        run: |
          Write-Host "Building for tag: $env:RELEASE_TAG"
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        shell: powershell
        run: |
          if (Get-Command rustup -ErrorAction SilentlyContinue) {
            rustup default stable
          } else {
            Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
            .\rustup-init.exe -y --default-toolchain stable
            echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          }
      - name: Install npm dependencies
        shell: powershell
        run: npm ci
      - name: Build Tauri app
        shell: powershell
        run: npm run tauri build
      - name: Upload to release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = $env:RELEASE_TAG
          Write-Host "Release tag: $TAG"
          if (-not $TAG) {
            Write-Host "ERROR: RELEASE_TAG is empty"
            exit 1
          }
          Write-Host "Waiting for release $TAG to be available..."
          $RELEASE_ID = $null
          for ($i = 1; $i -le 30; $i++) {
            try {
              $release = Invoke-RestMethod -Uri "$REPO_API/releases/tags/$TAG" -Headers $Headers -ErrorAction Stop
              $RELEASE_ID = $release.id
              if ($RELEASE_ID) {
                Write-Host "Found release: $TAG (ID: $RELEASE_ID)"
                break
              }
            } catch {}
            Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            Start-Sleep -Seconds 10
          }
          if (-not $RELEASE_ID) {
            Write-Host "ERROR: Failed to find release for tag $TAG after 30 attempts."
            exit 1
          }
          Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading $filename ($size MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            $uploadUrl = "$REPO_API/releases/$RELEASE_ID/assets?name=$encodedName"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: $filename"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: $result"
            }
          }
--- a/.gitea/workflows/build-sidecar-linux.yml
+++ b/.gitea/workflows/build-sidecar-linux.yml
@@ -0,0 +1,118 @@
 name: Build Sidecar (Linux)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
        required: true
 jobs:
  build-sidecar-linux:
    name: Build Sidecar (Linux)
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - name: Determine tag
        id: tag
        run: |
          TAG="${{ inputs.tag }}"
          if [ -z "$TAG" ]; then
            TAG="${{ github.event.inputs.tag }}"
          fi
          if [ -z "$TAG" ]; then
            TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/sidecar-v*' | head -1 | sed 's|.*refs/tags/||')
          fi
          echo "Building for tag: ${TAG}"
          echo "tag=${TAG}" >> $GITHUB_OUTPUT
      - uses: actions/checkout@v4
        with:
          ref: ${{ steps.tag.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y portaudio19-dev
      - name: Build sidecar (CUDA)
        run: |
          uv sync --frozen || uv sync
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CUDA)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cuda.zip .
      - name: Build sidecar (CPU)
        run: |
          rm -rf dist/local-transcription-backend build/
          uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
          # Run pyinstaller directly from venv to prevent uv run from
          # re-resolving torch back to the CUDA version via pyproject.toml sources
          .venv/bin/pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CPU)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.tag.outputs.tag }}"
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/build-sidecar-macos.yml
+++ b/.gitea/workflows/build-sidecar-macos.yml
@@ -0,0 +1,109 @@
 name: Build Sidecar (macOS)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
        required: true
 jobs:
  build-sidecar-macos:
    name: Build Sidecar (macOS)
    runs-on: macos-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - name: Determine tag
        id: tag
        run: |
          TAG="${{ inputs.tag }}"
          if [ -z "$TAG" ]; then
            TAG="${{ github.event.inputs.tag }}"
          fi
          if [ -z "$TAG" ]; then
            TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/sidecar-v*' | head -1 | sed 's|.*refs/tags/||')
          fi
          echo "Building for tag: ${TAG}"
          echo "tag=${TAG}" >> $GITHUB_OUTPUT
      - uses: actions/checkout@v4
        with:
          ref: ${{ steps.tag.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: brew install portaudio
      - name: Build sidecar (CPU)
        env:
          UV_NO_SOURCES: "1"
        run: |
          # UV_NO_SOURCES bypasses pyproject.toml's [tool.uv.sources] which forces
          # torch from the CUDA index (no macOS ARM wheels there).
          # Default PyPI torch includes MPS (Apple Silicon GPU) support.
          uv sync
          .venv/bin/pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CPU)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-macos-aarch64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.tag.outputs.tag }}"
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/build-sidecar-windows.yml
+++ b/.gitea/workflows/build-sidecar-windows.yml
@@ -0,0 +1,145 @@
 name: Build Sidecar (Windows)
 on:
  workflow_dispatch:
    inputs:
      tag:
        description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
        required: true
 jobs:
  build-sidecar-windows:
    name: Build Sidecar (Windows)
    runs-on: windows-latest
    env:
      PYTHON_VERSION: "3.11"
      RELEASE_TAG: ${{ inputs.tag }}
    steps:
      - name: Show tag
        shell: powershell
        run: |
          Write-Host "Building for tag: $env:RELEASE_TAG"
      - uses: actions/checkout@v4
        with:
          ref: ${{ inputs.tag }}
      - name: Install uv
        shell: powershell
        run: |
          if (Get-Command uv -ErrorAction SilentlyContinue) {
            Write-Host "uv already installed: $(uv --version)"
          } else {
            irm https://astral.sh/uv/install.ps1 | iex
            $uvPaths = @(
              "$env:USERPROFILE\.local\bin",
              "$env:USERPROFILE\.cargo\bin",
              "$env:LOCALAPPDATA\uv\bin"
            )
            foreach ($p in $uvPaths) {
              if (Test-Path $p) {
                echo $p | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
              }
            }
          }
      - name: Set up Python
        shell: powershell
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install 7-Zip
        shell: powershell
        run: |
          if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
            choco install 7zip -y
          }
      - name: Build sidecar (CUDA)
        shell: powershell
        run: |
          uv sync --frozen
          if ($LASTEXITCODE -ne 0) { uv sync }
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CUDA)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\dist\local-transcription-backend\*
      - name: Build sidecar (CPU)
        shell: powershell
        run: |
          Remove-Item -Recurse -Force dist\local-transcription-backend, build -ErrorAction SilentlyContinue
          uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
          .venv\Scripts\pyinstaller.exe local-transcription-headless.spec
      - name: Package sidecar (CPU)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\dist\local-transcription-backend\*
      - name: Upload to sidecar release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = $env:RELEASE_TAG
          Write-Host "Release tag: $TAG"
          if (-not $TAG) {
            Write-Host "ERROR: RELEASE_TAG is empty"
            exit 1
          }
          Write-Host "Waiting for sidecar release $TAG to be available..."
          $RELEASE_ID = $null
          for ($i = 1; $i -le 30; $i++) {
            try {
              $release = Invoke-RestMethod -Uri "$REPO_API/releases/tags/$TAG" -Headers $Headers -ErrorAction Stop
              $RELEASE_ID = $release.id
              if ($RELEASE_ID) {
                Write-Host "Found sidecar release: $TAG (ID: $RELEASE_ID)"
                break
              }
            } catch {}
            Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            Start-Sleep -Seconds 10
          }
          if (-not $RELEASE_ID) {
            Write-Host "ERROR: Failed to find sidecar release for tag $TAG after 30 attempts."
            exit 1
          }
          Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading $filename ($size MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            $uploadUrl = "$REPO_API/releases/$RELEASE_ID/assets?name=$encodedName"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: $filename"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: $result"
            }
          }
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,167 @@
 name: Release
 on:
  push:
    branches: [main]
    paths:
      - 'src/**'
      - 'src-tauri/**'
      - 'package.json'
      - 'vite.config.ts'
      - 'index.html'
 jobs:
  test:
    name: Run Tests
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
      - name: Install npm deps
        run: npm ci
      - name: Frontend tests
        run: npx vitest run
      - name: Install uv
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Python tests
        run: |
          uv venv .testvenv
          VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
          .testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
  bump-version:
    name: Bump version and tag
    needs: test
    runs-on: ubuntu-latest
    outputs:
      new_version: ${{ steps.bump.outputs.new_version }}
      tag: ${{ steps.bump.outputs.tag }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Configure git
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump patch version
        id: bump
        run: |
          CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
          echo "Current version: ${CURRENT}"
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New version: ${NEW_VERSION}"
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
          sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
          sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
          echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.new_version }}"
          git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml version.py
          git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
          git tag "v${NEW_VERSION}"
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "v${NEW_VERSION}"
      - name: Create Gitea release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          RELEASE_NAME="Local Transcription ${TAG}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
      - name: Trigger per-OS app builds
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          for workflow in build-app-linux.yml build-app-windows.yml build-app-macos.yml; do
            echo "Dispatching ${workflow} for ${TAG}..."
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/json" \
              -d "{\"ref\": \"main\", \"inputs\": {\"tag\": \"${TAG}\"}}" \
              "${REPO_API}/actions/workflows/${workflow}/dispatches")
            echo "  -> HTTP ${HTTP_CODE}"
          done
      - name: Clean up old app releases
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          KEEP=3
          PROTECT_TAG="v1.4.0"
          echo "Cleaning up old app releases (keeping latest ${KEEP} + ${PROTECT_TAG})..."
          # Get all app releases (v* tags, not sidecar-v*)
          RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases?limit=50" | jq -c '[.[] | select(.tag_name | startswith("v")) | select(.tag_name | startswith("sidecar") | not)]')
          TOTAL=$(echo "$RELEASES" | jq 'length')
          echo "Found ${TOTAL} app releases"
          if [ "$TOTAL" -le "$KEEP" ]; then
            echo "Nothing to clean up"
            exit 0
          fi
          # Skip the newest KEEP releases, delete the rest (except protected)
          echo "$RELEASES" | jq -c ".[$KEEP:][]" | while read -r release; do
            ID=$(echo "$release" | jq -r '.id')
            TAG=$(echo "$release" | jq -r '.tag_name')
            if [ "$TAG" = "$PROTECT_TAG" ]; then
              echo "  Protecting ${TAG}"
              continue
            fi
            echo "  Deleting release ${TAG} (ID: ${ID})..."
            curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${ID}"
            # Also delete the tag
            curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/tags/${TAG}"
          done
          echo "Cleanup complete"
--- a/.gitea/workflows/sidecar-release.yml
+++ b/.gitea/workflows/sidecar-release.yml
@@ -0,0 +1,174 @@
 name: Sidecar Release
 on:
  push:
    branches: [main]
    paths:
      - 'client/**'
      - 'server/**'
      - 'backend/**'
      - 'pyproject.toml'
      - 'local-transcription-headless.spec'
  workflow_dispatch:
 jobs:
  test:
    name: Run Tests
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install uv
        run: |
          curl -LsSf https://astral.sh/uv/install.sh | sh
          echo "$HOME/.local/bin" >> $GITHUB_PATH
      - name: Python tests
        run: |
          uv venv .testvenv
          VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
          .testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
  bump-sidecar-version:
    name: Bump sidecar version and tag
    needs: test
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    outputs:
      version: ${{ steps.bump.outputs.version }}
      tag: ${{ steps.bump.outputs.tag }}
      has_changes: ${{ steps.check_changes.outputs.has_changes }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 2
      - name: Check for backend changes
        id: check_changes
        run: |
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            exit 0
          fi
          CHANGED=$(git diff --name-only HEAD~1 HEAD -- client/ server/ backend/ pyproject.toml local-transcription-headless.spec 2>/dev/null || echo "")
          if [ -n "$CHANGED" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            echo "Backend changes detected: $CHANGED"
          else
            echo "has_changes=false" >> $GITHUB_OUTPUT
            echo "No backend changes detected, skipping sidecar build"
          fi
      - name: Configure git
        if: steps.check_changes.outputs.has_changes == 'true'
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump sidecar patch version
        if: steps.check_changes.outputs.has_changes == 'true'
        id: bump
        run: |
          CURRENT=$(grep '^version = ' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
          echo "Current sidecar version: ${CURRENT}"
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New sidecar version: ${NEW_VERSION}"
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
          echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.version }}"
          TAG="${{ steps.bump.outputs.tag }}"
          git add pyproject.toml
          git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
          git tag "${TAG}"
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "${TAG}"
      - name: Create Gitea release
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          VERSION="${{ steps.bump.outputs.version }}"
          RELEASE_NAME="Sidecar v${VERSION}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
      - name: Trigger per-OS sidecar builds
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          for workflow in build-sidecar-linux.yml build-sidecar-windows.yml build-sidecar-macos.yml; do
            echo "Dispatching ${workflow} for ${TAG}..."
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/json" \
              -d "{\"ref\": \"main\", \"inputs\": {\"tag\": \"${TAG}\"}}" \
              "${REPO_API}/actions/workflows/${workflow}/dispatches")
            echo "  -> HTTP ${HTTP_CODE}"
          done
      - name: Clean up old sidecar releases
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          KEEP=2
          echo "Cleaning up old sidecar releases (keeping latest ${KEEP})..."
          # Get all sidecar releases (sidecar-v* tags)
          RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases?limit=50" | jq -c '[.[] | select(.tag_name | startswith("sidecar-v"))]')
          TOTAL=$(echo "$RELEASES" | jq 'length')
          echo "Found ${TOTAL} sidecar releases"
          if [ "$TOTAL" -le "$KEEP" ]; then
            echo "Nothing to clean up"
            exit 0
          fi
          # Skip the newest KEEP releases, delete the rest
          echo "$RELEASES" | jq -c ".[$KEEP:][]" | while read -r release; do
            ID=$(echo "$release" | jq -r '.id')
            TAG=$(echo "$release" | jq -r '.tag_name')
            echo "  Deleting sidecar release ${TAG} (ID: ${ID})..."
            curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${ID}"
            # Also delete the tag
            curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/tags/${TAG}"
          done
          echo "Cleanup complete"
--- a/.gitea/workflows/test.yml
+++ b/.gitea/workflows/test.yml
@@ -0,0 +1,66 @@
 name: Tests
 on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:
 jobs:
  python-tests:
    name: Python Backend Tests
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Run pytest
        run: |
          uv venv .testvenv
          VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
          .testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
  frontend-tests:
    name: Frontend Tests
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
      - name: Install dependencies
        run: npm ci
      - name: Run Vitest
        run: npx vitest run
  rust-tests:
    name: Rust Sidecar Tests
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install Rust
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install Tauri system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
      - name: Run cargo test
        working-directory: src-tauri
        run: cargo test
--- a/.gitignore
+++ b/.gitignore
@@ -10,8 +10,8 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+/lib/
-lib64/
+/lib64/
 parts/
 sdist/
 var/
@@ -54,3 +54,15 @@ models/
 # PyInstaller
 *.spec.lock
 # Node.js
 node_modules/
 # Vite / Svelte build output
 dist/
 # Tauri
 src-tauri/target/
 # Windows NTFS alternate data streams
 *:Zone.Identifier
--- a/2025-live-transcription-research.md:Zone.Identifier
+++ b/2025-live-transcription-research.md:Zone.Identifier
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,52 +4,114 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Project Overview
-Local Transcription is a desktop application for real-time speech-to-text transcription designed for streamers. It uses Whisper models (via faster-whisper) to transcribe audio locally with optional multi-user server synchronization.
+Local Transcription is a cross-platform desktop application for real-time speech-to-text transcription designed for streamers. It supports local Whisper models and cloud-based Deepgram transcription, with OBS browser source integration and optional multi-user sync.
 **Architecture:** Two-process model — a Tauri v2 shell (Svelte 5 frontend) communicates with a headless Python backend (sidecar) via REST API and WebSocket.
 **Key Features:**
- Standalone desktop GUI (PySide6/Qt)
+- Cross-platform desktop app (Windows, macOS, Linux) via Tauri v2 + Svelte 5
- Local transcription with CPU/GPU support
+- Headless Python backend with FastAPI control API
- Built-in web server for OBS browser source integration
+- Dual transcription modes: local Whisper or cloud Deepgram (managed/BYOK)
- Optional Node.js-based multi-user server for syncing transcriptions across users
+- Built-in web server for OBS browser source at `http://localhost:8080`
- Noise suppression and Voice Activity Detection (VAD)
+- Optional multi-user sync via Node.js server
- Cross-platform builds (Linux/Windows) with PyInstaller
+- CUDA, MPS (Apple Silicon), and CPU support
 - Auto-updates, custom fonts, configurable colors
 > **Legacy GUI:** The original PySide6/Qt GUI (`main.py`, `gui/`) still works during the transition. New features should target the Tauri frontend and headless backend.
 ## Project Structure
 ```
 local-transcription/
-├── client/                   # Core transcription logic
+├── src/                             # Svelte 5 frontend (Tauri UI)
-│   ├── audio_capture.py      # Audio input and buffering
+│   ├── App.svelte                   # Main app shell
-│   ├── transcription_engine.py # Whisper model integration
+│   ├── app.css                      # Global dark theme styles
-│   ├── noise_suppression.py  # VAD and noise reduction
+│   ├── main.ts                      # Svelte mount point
-│   ├── device_utils.py       # CPU/GPU device management
+│   ├── lib/components/              # UI components
-│   ├── config.py             # Configuration management
+│   │   ├── Header.svelte            # Title bar + settings button
-│   └── server_sync.py        # Multi-user server sync client
+│   │   ├── StatusBar.svelte         # State indicator, device, user info
-├── gui/                      # Desktop application UI
+│   │   ├── Controls.svelte          # Start/Stop, Clear, Save buttons
-│   ├── main_window_qt.py     # Main application window (PySide6)
+│   │   ├── TranscriptionDisplay.svelte  # Scrolling transcript view
-│   ├── settings_dialog_qt.py # Settings dialog (PySide6)
+│   │   └── Settings.svelte          # Full settings modal (all sections)
-│   └── transcription_display_qt.py # Display widget
+│   └── lib/stores/                  # Svelte 5 reactive stores ($state/$derived)
-├── server/                   # Web display servers
+│       ├── backend.ts               # WebSocket + REST API client
-│   ├── web_display.py        # FastAPI server for OBS browser source (local)
+│       ├── config.ts                # App configuration fetch/update
-│   └── nodejs/               # Optional multi-user Node.js server
+│       └── transcriptions.ts        # Transcript data management
-│       ├── server.js         # Multi-user sync server with WebSocket
+├── src-tauri/                       # Tauri v2 Rust shell
-│       ├── package.json      # Node.js dependencies
+│   ├── src/lib.rs                   # Plugin registration (shell, dialog, process)
-│       └── README.md         # Server deployment documentation
+│   ├── src/main.rs                  # Entry point
-├── config/                   # Example configuration files
+│   ├── tauri.conf.json              # Window, bundle, plugin config
-│   └── default_config.yaml   # Default settings template
+│   └── Cargo.toml                   # Rust dependencies
-├── main.py                   # GUI application entry point
+├── backend/                         # Headless Python backend (the sidecar)
-├── main_cli.py              # CLI version for testing
+│   ├── app_controller.py            # Core orchestration (engine, sync, config)
-└── pyproject.toml           # Dependencies and build config
+│   ├── api_server.py                # FastAPI REST endpoints + /ws/control
 │   └── main_headless.py             # Headless entry point (prints JSON to stdout)
 ├── client/                          # Core transcription modules (used by backend)
 │   ├── audio_capture.py             # Audio input handling
 │   ├── transcription_engine_realtime.py  # RealtimeSTT / Whisper engine
 │   ├── deepgram_transcription.py    # Deepgram WebSocket cloud transcription
 │   ├── noise_suppression.py         # VAD and noise reduction
 │   ├── device_utils.py              # CPU/GPU/MPS detection
 │   ├── config.py                    # YAML config management (~/.local-transcription/)
 │   ├── server_sync.py               # Multi-user server sync client
 │   ├── instance_lock.py             # Single-instance PID lock
 │   └── update_checker.py            # Gitea release update checker
 ├── gui/                             # Legacy PySide6/Qt GUI (still functional)
 │   ├── main_window_qt.py            # Main window (orchestration lives here in legacy)
 │   ├── settings_dialog_qt.py        # Settings dialog
 │   └── transcription_display_qt.py  # Display widget
 ├── server/
 │   ├── web_display.py               # FastAPI OBS display server (WebSocket + HTML)
 │   └── nodejs/                      # Optional multi-user sync server
 ├── .gitea/workflows/                # CI/CD
 │   ├── release.yml                  # Coordinator: version bump, tag, release creation
 │   ├── build-app-linux.yml          # Linux Tauri app build (triggered by v* tag)
 │   ├── build-app-windows.yml        # Windows Tauri app build (triggered by v* tag)
 │   ├── build-app-macos.yml          # macOS Tauri app build (triggered by v* tag)
 │   ├── sidecar-release.yml          # Sidecar coordinator: version bump, tag, release
 │   ├── build-sidecar-linux.yml      # Linux sidecar build (triggered by sidecar-v* tag)
 │   ├── build-sidecar-windows.yml    # Windows sidecar build (triggered by sidecar-v* tag)
 │   └── build-sidecar-macos.yml      # macOS sidecar build (triggered by sidecar-v* tag)
 ├── config/default_config.yaml       # Default settings template
 ├── main.py                          # Legacy PySide6 GUI entry point
 ├── main_cli.py                      # CLI version for testing
 ├── version.py                       # Version string (__version__)
 ├── local-transcription.spec         # PyInstaller config (legacy, includes PySide6)
 ├── local-transcription-headless.spec # PyInstaller config (headless sidecar, no Qt)
 ├── pyproject.toml                   # Python deps (uv, CUDA PyTorch index)
 ├── package.json                     # Node/Tauri deps
 └── vite.config.ts                   # Vite build config ($lib alias)
 ```
 ## Development Commands
-### Installation and Setup
+### Frontend (Tauri + Svelte)
 ```bash
-# Install dependencies (creates .venv automatically)
+# Install npm dependencies
 npm install
 # Run Tauri in development mode (hot-reload)
 npm run tauri dev
 # Build frontend only (for testing)
 npx vite build
 # Type-check Svelte
 npx svelte-check
 # Check Rust compiles
 cd src-tauri && cargo check
 ```
 ### Backend (Python)
 ```bash
 # Install Python dependencies
 uv sync
-# Run the GUI application
+# Run the headless backend standalone (for development)
 uv run python -m backend.main_headless --port 8080
 # Run the legacy PySide6 GUI
 uv run python main.py
 # Run CLI version (headless, for testing)
@@ -57,257 +119,163 @@ uv run python main_cli.py
 # List available audio devices
 uv run python main_cli.py --list-devices
 # Install with CUDA support (if needed)
 uv pip install torch --index-url https://download.pytorch.org/whl/cu121
 ```
-### Building Executables
+### Building
 ```bash
-# Linux (includes CUDA support - works on both GPU and CPU systems)
+# Build Tauri app (produces platform installer)
-./build.sh
+npm run tauri build
-# Windows (includes CUDA support - works on both GPU and CPU systems)
+# Build headless Python sidecar (no PySide6)
-build.bat
+uv run pyinstaller local-transcription-headless.spec
 # Output: dist/local-transcription-backend/
-# Manual build with PyInstaller
+# Build legacy PySide6 app
 uv sync                          # Install dependencies (includes CUDA PyTorch)
 uv pip uninstall -q enum34       # Remove incompatible enum34 package
 uv run pyinstaller local-transcription.spec
 # Or use: ./build.sh (Linux) / build.bat (Windows)
 ```
 **Important:** All builds include CUDA support via `pyproject.toml` configuration. CUDA builds can be created on systems without NVIDIA GPUs. The PyTorch CUDA runtime is bundled, and the app automatically falls back to CPU if no GPU is available.
 ### Testing
 ```bash
 # Run component tests
 uv run python test_components.py
 # Check CUDA availability
 uv run python check_cuda.py
 # Test web server manually
 uv run python -m uvicorn server.web_display:app --reload
 ```
-## Architecture
+## Architecture Details
-### Audio Processing Pipeline
+### Communication: Tauri <-> Python Backend
-1. **Audio Capture** ([client/audio_capture.py](client/audio_capture.py))
+The Svelte frontend connects to the Python backend via two channels:
   - Captures audio from microphone/system using sounddevice
   - Handles automatic sample rate detection and resampling
   - Uses chunking with overlap for better transcription quality
   - Default: 3-second chunks with 0.5s overlap
-2. **Noise Suppression** ([client/noise_suppression.py](client/noise_suppression.py))
+**REST API** (on port 8081 by default):
-   - Applies noisereduce for background noise reduction
+- `GET /api/status` — app state, device info, version
-   - Voice Activity Detection (VAD) using webrtcvad
+- `POST /api/start` / `POST /api/stop` — transcription control
-   - Skips silent segments to improve performance
+- `GET /api/config` / `PUT /api/config` — read/write settings (dot-notation keys)
 - `GET /api/audio-devices` / `GET /api/compute-devices` — device enumeration
 - `POST /api/reload-engine` — reload with new model/device
 - `GET /api/transcriptions` / `POST /api/clear` — transcript management
 - `POST /api/save-file` — write text to a file path
 - `GET /api/check-update` / `POST /api/skip-version` — update management
 - `POST /api/login` / `POST /api/register` / `GET /api/balance` — managed mode proxy
-3. **Transcription** ([client/transcription_engine.py](client/transcription_engine.py))
+**WebSocket** `/ws/control`:
-   - Uses faster-whisper for efficient inference
+- Pushes real-time events: `state_changed`, `transcription`, `preview`, `error`, `credits_low`
-   - Supports CPU, CUDA, and Apple MPS (Mac)
+- Client sends keepalive pings
   - Models: tiny, base, small, medium, large
   - Thread-safe model loading with locks
-4. **Display** ([gui/main_window_qt.py](gui/main_window_qt.py))
+The OBS display server runs separately on port 8080 (`GET /` for HTML, `WebSocket /ws` for transcriptions).
   - PySide6/Qt-based desktop GUI
   - Real-time transcription display with scrolling
   - Settings panel with live updates (no restart needed)
-### Web Server Architecture
+### Backend Process Lifecycle
-**Local Web Server** ([server/web_display.py](server/web_display.py))
+1. `main_headless.py` starts, acquires instance lock, creates `AppController`
- Always runs when GUI starts (port 8080 by default)
+2. `AppController.initialize()` starts the OBS web server (port 8080) and engine init thread
- FastAPI with WebSocket for real-time updates
+3. `APIServer` wraps the controller with FastAPI routes, runs on port 8081
- Used for OBS browser source integration
+4. Backend prints `{"event": "ready", "port": 8080}` to stdout for Tauri to discover
- Single-user (displays only local transcriptions)
+5. On shutdown: engine stopped, web server stopped, lock released
-**Multi-User Server** (Optional - for syncing across multiple users)
+### Headless Backend vs Legacy GUI
-**Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **RECOMMENDED**
+The `AppController` class (`backend/app_controller.py`) extracts all orchestration logic from `gui/main_window_qt.py` into a Qt-free class. The mapping:
 - Real-time WebSocket support (< 100ms latency)
 - Handles 100+ concurrent users
 - Easy deployment to VPS/cloud hosting (Railway, Heroku, DigitalOcean, or any VPS)
 - Configurable display options via URL parameters:
  - `timestamps=true/false` - Show/hide timestamps
  - `maxlines=50` - Maximum visible lines (prevents scroll bars in OBS)
  - `fontsize=16` - Font size in pixels
  - `fontfamily=Arial` - Font family
  - `fade=10` - Seconds before text fades (0 = never)
-See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructions
+| Legacy (MainWindow) | Headless (AppController) |
 |---------------------|--------------------------|
 | `_initialize_components()` | `_initialize_engine()` |
 | `_start_transcription()` | `start_transcription()` |
 | `_stop_transcription()` | `stop_transcription()` |
 | `_on_settings_saved()` | `apply_settings()` |
 | `_reload_engine()` | `reload_engine()` |
 | `_start_web_server_if_enabled()` | `_start_web_server()` |
 | `_start_server_sync()` | `_start_server_sync()` |
 | Qt signals | Callbacks (`on_state_changed`, `on_transcription`, etc.) |
-### Configuration System
+### Threading Model (Headless)
- Config stored at `~/.local-transcription/config.yaml`
+- Main thread: Uvicorn (FastAPI) event loop
- Managed by [client/config.py](client/config.py)
+- Engine init thread: Downloads models, initializes VAD
- Settings apply immediately without restart (except model changes)
+- Web server thread: Separate asyncio loop for OBS display
- YAML format with nested keys (e.g., `transcription.model`)
+- Audio capture: Runs in engine callback threads
 - All results flow through `AppController` callbacks -> `APIServer` WebSocket broadcast
-### Device Management
+### Svelte Frontend
- [client/device_utils.py](client/device_utils.py) handles CPU/GPU detection
+Uses Svelte 5 runes throughout (`$state`, `$derived`, `$effect`, `$props`). No Svelte 4 patterns.
 - Auto-detects CUDA, MPS (Mac), or falls back to CPU
 - Compute types: float32 (best quality), float16 (GPU), int8 (fastest)
 - Thread-safe device selection
-## Key Implementation Details
+**Stores** (`src/lib/stores/`):
 - `backend.ts` — WebSocket connection + REST helpers (`apiGet`, `apiPost`, `apiPut`), auto-reconnect
 - `config.ts` — fetches/updates config from backend API
 - `transcriptions.ts` — manages transcript list, listens for `CustomEvent`s from backend store
-### PyInstaller Build Configuration
+**Key patterns:**
 - Backend store dispatches `CustomEvent`s on `window` for cross-store communication
 - Settings component collects all changed values into a `Record<string, any>` with dot-notation keys, sends via `PUT /api/config`
 - Controls use Tauri dialog plugin for native file save, falls back to blob download
- [local-transcription.spec](local-transcription.spec) controls build
+## CI/CD
 - UPX compression enabled for smaller executables
 - Hidden imports required for PySide6, faster-whisper, torch
 - Console mode enabled by default (set `console=False` to hide)
-### Threading Model
+Eight Gitea Actions workflows in `.gitea/workflows/`, split into coordinators and per-OS builders:
- Main thread: Qt GUI event loop
+**App release (Tauri):**
- Audio thread: Captures and processes audio chunks
+- **`release.yml`**: Coordinator. Triggers on push to `main`. Auto-bumps version in package.json/tauri.conf.json/Cargo.toml/version.py, commits, tags `v{VERSION}`, creates Gitea release.
- Web server thread: Runs FastAPI server
+- **`build-app-linux.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.deb`/`.rpm`/`.AppImage`.
- Transcription: Runs in callback thread from audio capture
+- **`build-app-windows.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.msi`/`*-setup.exe`.
- All transcription results communicated via Qt signals
+- **`build-app-macos.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.dmg`.
-### Server Sync (Optional Multi-User Feature)
+**Sidecar release (Python backend):**
 - **`sidecar-release.yml`**: Coordinator. Triggers on push to `main` with changes in `client/`, `server/`, `backend/`, `pyproject.toml`, or `local-transcription-headless.spec`. Bumps version in pyproject.toml/version.py, tags `sidecar-v{VERSION}`, creates Gitea release.
 - **`build-sidecar-linux.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CUDA + CPU sidecars via PyInstaller.
 - **`build-sidecar-windows.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CUDA + CPU sidecars via PyInstaller.
 - **`build-sidecar-macos.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CPU-only sidecar via PyInstaller.
- [client/server_sync.py](client/server_sync.py) handles server communication
+All per-OS build workflows can be re-run independently via `workflow_dispatch` with an optional `tag` input. All require a `BUILD_TOKEN` secret (Gitea API token with release write access).
 - Toggle in Settings: "Enable Server Sync"
 - Sends transcriptions to Node.js server via HTTP POST
 - Real-time updates via WebSocket to display page
 - Per-speaker font support (Web-Safe, Google Fonts, Custom uploads)
 - Falls back gracefully if server unavailable
 ## Common Patterns
 ### Adding a New Setting
-1. Add to [config/default_config.yaml](config/default_config.yaml)
+1. Add default to [config/default_config.yaml](config/default_config.yaml)
-2. Update [client/config.py](client/config.py) if validation needed
+2. Add UI control in [src/lib/components/Settings.svelte](src/lib/components/Settings.svelte)
-3. Add UI control in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
+3. Ensure the setting is included in the save handler's config update
-4. Apply setting in relevant component (no restart if possible)
+4. Apply in `AppController.apply_settings()` or the relevant component
-5. Emit signal to update display if needed
+5. For legacy GUI: also update [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
 ### Adding a New API Endpoint
 1. Add route in [backend/api_server.py](backend/api_server.py) `_setup_routes()`
 2. Add supporting logic in [backend/app_controller.py](backend/app_controller.py) if needed
 3. Call from Svelte via `backendStore.apiGet/apiPost/apiPut`
 ### Modifying Transcription Display
- Local GUI: [gui/transcription_display_qt.py](gui/transcription_display_qt.py)
+- Tauri UI: [src/lib/components/TranscriptionDisplay.svelte](src/lib/components/TranscriptionDisplay.svelte)
- Local web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
+- OBS display: [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
 - Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
 ### Adding a New Model Size
 - Update [client/transcription_engine.py](client/transcription_engine.py)
 - Add to model selector in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
 - Update CLI argument choices in [main_cli.py](main_cli.py)
 ## Dependencies
-**Core:**
+**Frontend:** Tauri v2, Svelte 5, Vite, TypeScript
- `faster-whisper`: Optimized Whisper inference
+**Backend:** Python 3.9+, FastAPI, Uvicorn, RealtimeSTT, faster-whisper, PyTorch (CUDA), sounddevice
- `torch`: ML framework (CUDA-enabled via special index)
+**Build:** PyInstaller (sidecar), Tauri CLI (app), uv (Python packages)
- `PySide6`: Qt6 bindings for GUI
+**CI:** Gitea Actions with platform-specific runners
 - `sounddevice`: Cross-platform audio I/O
 - `noisereduce`, `webrtcvad`: Audio preprocessing
 **Web Server:**
 - `fastapi`, `uvicorn`: Web server and ASGI
 - `websockets`: Real-time communication
 **Build:**
 - `pyinstaller`: Create standalone executables
 - `uv`: Fast package manager
 **PyTorch CUDA Index:**
 - Configured in [pyproject.toml](pyproject.toml) under `[[tool.uv.index]]`
 - Uses PyTorch's custom wheel repository for CUDA builds
 - Automatically installed with `uv sync` when using CUDA build scripts
 ## Platform-Specific Notes
 ### Linux
- Uses PulseAudio/ALSA for audio
+- Tauri needs: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
- Build scripts use bash (`.sh` files)
+- Audio: PulseAudio/ALSA via sounddevice
 - Executable: `dist/LocalTranscription/LocalTranscription`
 ### Windows
- Uses Windows Audio/WASAPI
+- Tauri needs: WebView2 (usually pre-installed on Windows 10+)
- Build scripts use batch (`.bat` files)
+- Audio: WASAPI via sounddevice
 - Executable: `dist\LocalTranscription\LocalTranscription.exe`
 - Requires Visual C++ Redistributable on target systems
-### Cross-Building
+### macOS
- **Cannot cross-compile** - must build on target platform
+- Tauri needs: Xcode Command Line Tools
- CI/CD should use platform-specific runners
+- Audio: CoreAudio via sounddevice
-
+- GPU: MPS (Apple Silicon) detected by `device_utils.py`
-## Troubleshooting
+- `Info.plist` must include `NSMicrophoneUsageDescription` for mic access
-
+- No CUDA builds — CPU/MPS only
 ### Model Loading Issues
 - Models download to `~/.cache/huggingface/`
 - First run requires internet connection
 - Check disk space (models: 75MB-3GB depending on size)
 ### Audio Device Issues
 - Run `uv run python main_cli.py --list-devices`
 - Check permissions (microphone access)
 - Try different device indices in settings
 ### GPU Not Detected
 - Run `uv run python check_cuda.py`
 - Install CUDA drivers (not CUDA toolkit - bundled in build)
 - Verify PyTorch sees GPU: `python -c "import torch; print(torch.cuda.is_available())"`
 ### Web Server Port Conflicts
 - Default port: 8080
 - Change in [gui/main_window_qt.py](gui/main_window_qt.py) or config
 - Use `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
 ## OBS Integration
 ### Local Display (Single User)
 1. Start Local Transcription app
 2. In OBS: Add "Browser" source
 3. URL: `http://localhost:8080`
 4. Set dimensions (e.g., 1920x300)
 ### Multi-User Display (Node.js Server)
 1. Deploy Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
 2. Each user configures Server URL: `http://your-server:3000/api/send`
 3. Enter same room name and passphrase
 4. In OBS: Add "Browser" source
 5. URL: `http://your-server:3000/display?room=ROOM&fade=10&timestamps=true&maxlines=50&fontsize=16`
 6. Customize URL parameters as needed:
   - `timestamps=false` - Hide timestamps
   - `maxlines=30` - Show max 30 lines (prevents scroll bars)
   - `fontsize=18` - Larger font
   - `fontfamily=Courier` - Different font
 ## Performance Optimization
 **For Real-Time Transcription:**
 - Use `tiny` or `base` model (faster)
 - Enable GPU if available (5-10x faster)
 - Increase chunk_duration for better accuracy (higher latency)
 - Decrease chunk_duration for lower latency (less context)
 - Enable VAD to skip silent audio
 **For Build Size Reduction:**
 - Don't bundle models (download on demand)
 - Use CPU-only build if no GPU users
 - Enable UPX compression (already in spec)
 ## Phase Status
 - ✅ **Phase 1**: Standalone desktop application (complete)
 - ✅ **Web Server**: Local OBS integration (complete)
 - ✅ **Builds**: PyInstaller executables (complete)
 - ✅ **Phase 2**: Multi-user Node.js server (complete, optional)
 - ⏸️ **Phase 3+**: Advanced features (see [NEXT_STEPS.md](NEXT_STEPS.md))
 ## Related Documentation
- [README.md](README.md) - User-facing documentation
+- [README.md](README.md) — User-facing documentation
- [BUILD.md](BUILD.md) - Detailed build instructions
+- [BUILD.md](BUILD.md) — Detailed build instructions
- [INSTALL.md](INSTALL.md) - Installation guide
+- [INSTALL.md](INSTALL.md) — Installation guide
- [NEXT_STEPS.md](NEXT_STEPS.md) - Future enhancements
+- [server/nodejs/README.md](server/nodejs/README.md) — Node.js server setup
 - [server/nodejs/README.md](server/nodejs/README.md) - Node.js server setup and deployment
--- a/DEEPGRAM_PROXY_PLAN.md
+++ b/DEEPGRAM_PROXY_PLAN.md
@@ -0,0 +1,574 @@
 # Deepgram Proxy Service — Build Plan
 ## Project Overview
 Build a standalone hosted service that acts as a Deepgram proxy for the Local Transcription
 desktop app. Users can either provide their own Deepgram API key (BYOK) or use the managed
 service with prepaid credits purchased via Stripe.
 This is a **separate repository** from `local-transcription`. The desktop app will be updated
 in a second phase to support both modes.
 ---
 ## Repository Structure
 ```
 transcription-proxy/
 ├── src/
 │   ├── server.js              # Express app entry point
 │   ├── config.js              # Environment config loader
 │   ├── db/
 │   │   ├── index.js           # node-postgres pool setup
 │   │   └── migrations/        # SQL migration files (numbered)
 │   │       ├── 001_users.sql
 │   │       ├── 002_credits.sql
 │   │       ├── 003_sessions.sql
 │   │       └── 004_usage_ledger.sql
 │   ├── middleware/
 │   │   ├── auth.js            # JWT verification middleware
 │   │   └── rateLimit.js       # Per-user rate limiting
 │   ├── routes/
 │   │   ├── auth.js            # POST /auth/register, /auth/login, /auth/refresh
 │   │   ├── billing.js         # POST /billing/checkout, GET /billing/balance
 │   │   └── account.js         # GET /account/me, GET /account/usage
 │   ├── websocket/
 │   │   └── proxy.js           # WebSocket proxy handler (core feature)
 │   └── webhooks/
 │       └── stripe.js          # POST /webhooks/stripe
 ├── web/                       # Simple frontend dashboard
 │   ├── index.html             # Landing / login page
 │   ├── dashboard.html         # Balance, usage history, buy credits
 │   └── assets/
 │       ├── app.js
 │       └── style.css
 ├── .env.example
 ├── package.json
 ├── docker-compose.yml         # Postgres + app for local dev
 └── CLAUDE.md                  # This file (after renaming)
 ```
 ---
 ## Technology Stack
 - **Runtime**: Node.js 20+
 - **Framework**: Express 4
 - **WebSocket**: `ws` library (not socket.io — keep it lean)
 - **Database**: PostgreSQL 15+ via `pg` (node-postgres)
 - **Auth**: JWT via `jsonwebtoken`, passwords hashed with `bcrypt`
 - **Payments**: Stripe Node SDK (`stripe`)
 - **Environment**: `dotenv`
 - **Dev tooling**: `nodemon` for dev, no TypeScript (keep it simple)
 ---
 ## Database Schema
 Run migrations in order. Use a simple `schema_migrations` table to track applied migrations.
 ### 001_users.sql
 ```sql
 CREATE TABLE schema_migrations (
  version INTEGER PRIMARY KEY,
  applied_at TIMESTAMPTZ DEFAULT NOW()
 );
 CREATE TABLE users (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  email TEXT UNIQUE NOT NULL,
  password_hash TEXT NOT NULL,
  stripe_customer_id TEXT UNIQUE,
  created_at TIMESTAMPTZ DEFAULT NOW(),
  updated_at TIMESTAMPTZ DEFAULT NOW()
 );
 ```
 ### 002_credits.sql
 ```sql
 CREATE TABLE credit_balance (
  user_id UUID PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
  seconds_remaining INTEGER NOT NULL DEFAULT 0,
  updated_at TIMESTAMPTZ DEFAULT NOW()
 );
 ```
 ### 003_sessions.sql
 ```sql
 CREATE TABLE transcription_sessions (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  user_id UUID NOT NULL REFERENCES users(id),
  mode TEXT NOT NULL CHECK (mode IN ('managed', 'byok')),
  started_at TIMESTAMPTZ DEFAULT NOW(),
  ended_at TIMESTAMPTZ,
  seconds_used INTEGER NOT NULL DEFAULT 0,
  deepgram_model TEXT,
  status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'completed', 'terminated'))
 );
 CREATE INDEX idx_sessions_user_id ON transcription_sessions(user_id);
 CREATE INDEX idx_sessions_started_at ON transcription_sessions(started_at);
 ```
 ### 004_usage_ledger.sql
 ```sql
 CREATE TABLE usage_ledger (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  user_id UUID NOT NULL REFERENCES users(id),
  session_id UUID REFERENCES transcription_sessions(id),
  recorded_at TIMESTAMPTZ DEFAULT NOW(),
  seconds INTEGER NOT NULL,
  description TEXT  -- e.g. 'session_usage', 'credit_purchase', 'manual_adjustment'
 );
 CREATE INDEX idx_ledger_user_id ON usage_ledger(user_id);
 ```
 ---
 ## Environment Variables (.env.example)
 ```env
 # Server
 PORT=3000
 NODE_ENV=development
 # Database
 DATABASE_URL=postgresql://user:password@localhost:5432/transcription_proxy
 # Auth
 JWT_SECRET=changeme_use_long_random_string
 JWT_EXPIRY=7d
 # Stripe
 STRIPE_SECRET_KEY=sk_test_...
 STRIPE_WEBHOOK_SECRET=whsec_...
 # Deepgram
 DEEPGRAM_API_KEY=your_deepgram_key_here
 # Pricing (seconds per dollar — adjust for your margin)
 # Default: 1000 seconds per $1 = $0.006/min managed cost covered + margin
 CREDITS_PER_DOLLAR=1000
 ```
 ---
 ## Phase 1 — Core Server & Auth
 ### Goals
 - Working Express app with Postgres connection
 - Migration runner
 - User registration and login
 - JWT middleware
 ### Tasks
 1. **Scaffold project**
   - `npm init`, install dependencies: `express ws pg jsonwebtoken bcrypt stripe dotenv`
   - Dev dependencies: `nodemon`
   - Add `start` and `dev` scripts to package.json
 2. **Database connection** (`src/db/index.js`)
   - Export a `pg.Pool` instance using `DATABASE_URL`
   - Export a `migrate()` function that reads `src/db/migrations/*.sql` in order,
     checks `schema_migrations` table, and applies unapplied ones
   - Call `migrate()` on server startup before listening
 3. **Auth routes** (`src/routes/auth.js`)
   - `POST /auth/register` — validate email/password, hash password with bcrypt (cost 12),
     insert user, insert empty credit_balance row, return JWT
   - `POST /auth/login` — verify credentials, return JWT + refresh token
   - `POST /auth/refresh` — validate refresh token, return new JWT
   - Passwords: minimum 8 characters, validate email format
 4. **JWT middleware** (`src/middleware/auth.js`)
   - Verify `Authorization: Bearer <token>` header
   - Attach `req.user = { id, email }` on success
   - Return 401 on failure
   - Export as `requireAuth` middleware
 5. **Basic health check**
   - `GET /health` returns `{ status: 'ok', db: 'connected' }`
 ---
 ## Phase 2 — Billing & Credits
 ### Goals
 - Stripe Checkout session creation for credit purchases
 - Webhook handler to fulfill purchases
 - Balance endpoint
 ### Payment Methods
 Use **Stripe Dynamic Payment Methods** — do NOT hardcode `payment_method_types` in the
 Checkout Session. Instead, leave it unset and manage everything from the Stripe Dashboard.
 Enable the following in the Stripe Dashboard under Settings → Payment Methods:
 - **Cards** (Visa, Mastercard, Amex, Discover) — on by default
 - **PayPal** — enable manually
 - **Apple Pay** — on by default, shows automatically on Safari/iOS
 - **Google Pay** — enable manually (one toggle)
 - **Cash App Pay** — enable manually (popular with streaming audiences)
 - **Link** — Stripe's saved payment network, on by default
 Stripe will automatically show the most relevant methods to each user based on their
 location and device. No code changes are needed to add or remove methods in future —
 it's all dashboard config.
 ### Credit Packages
 Define these as constants in `src/config.js`:
 ```javascript
 CREDIT_PACKAGES: [
  { id: 'pack_500',  label: '500 minutes',  seconds: 30000,  price_cents: 300  },
  { id: 'pack_1200', label: '1200 minutes', seconds: 72000,  price_cents: 600  },
  { id: 'pack_3000', label: '3000 minutes', seconds: 180000, price_cents: 1200 },
 ]
 ```
 Adjust pricing to cover Deepgram costs ($0.006/min = $0.0001/sec) plus margin and
 Stripe fees (~2.9% + $0.30).
 ### Tasks
 1. **Stripe customer creation**
   - On user registration, create a Stripe customer and store `stripe_customer_id`
   - Do this asynchronously (don't block registration response)
 2. **Billing routes** (`src/routes/billing.js`)
   - `GET /billing/packages` — return credit package list (no auth required)
   - `POST /billing/checkout` — requires auth, accepts `{ package_id }`,
     creates Stripe Checkout Session using dynamic payment methods (do NOT pass
     `payment_method_types` — omitting it enables dynamic methods automatically),
     include `payment_intent_data.metadata` containing `user_id` and `package_id`,
     returns `{ checkout_url }`
   - `GET /billing/balance` — requires auth, returns `{ seconds_remaining, minutes_remaining }`
 3. **Stripe webhook** (`src/webhooks/stripe.js`)
   - Mount at `POST /webhooks/stripe` with raw body (use `express.raw()` for this route only)
   - Verify signature with `stripe.webhooks.constructEvent()`
   - Handle `checkout.session.completed`:
     - Extract `user_id` and `package_id` from metadata
     - Add seconds to `credit_balance`
     - Insert row into `usage_ledger` with description `'credit_purchase'`
   - Handle `payment_intent.payment_failed`: log it (no action needed for prepaid)
 4. **Success/cancel pages**
   - Stripe Checkout redirects to `GET /billing/success?session_id=...` and `/billing/cancel`
   - These can be simple HTML responses or redirects to the web dashboard
 ---
 ## Phase 3 — WebSocket Proxy (Core Feature)
 This is the most critical component. The proxy sits between the desktop client and Deepgram,
 forwarding audio while tracking usage in real time.
 ### Connection Flow
 ```
 Client connects → validate JWT → check credit balance → open Deepgram upstream
     ↓
 Audio chunks arrive → forward to Deepgram → record usage every 5 seconds
     ↓
 Transcription arrives from Deepgram → forward to client
     ↓
 Client disconnects (or credits exhausted) → close upstream → finalize session
 ```
 ### WebSocket Protocol
 **Client connects to**: `wss://your-domain/ws/transcribe`
 **Client sends as first message** (JSON):
 ```json
 {
  "type": "auth",
  "token": "<JWT>",
  "config": {
    "model": "nova-2",
    "language": "en-US",
    "interim_results": true,
    "endpointing": 300
  }
 }
 ```
 **After auth success, client sends**: raw audio binary frames (PCM 16kHz mono)
 **Server sends to client**:
 ```json
 { "type": "ready" }
 { "type": "transcript", "text": "...", "is_final": true, "confidence": 0.98 }
 { "type": "error", "code": "insufficient_credits", "message": "..." }
 { "type": "credits_low", "seconds_remaining": 300 }
 { "type": "session_end", "seconds_used": 120 }
 ```
 ### Tasks (`src/websocket/proxy.js`)
 1. **Upgrade handler**
   - Attach to the HTTP server using `ws.Server({ noServer: true })`
   - In `server.on('upgrade', ...)`, route `/ws/transcribe` to this handler
 2. **Auth handshake**
   - First message must be `{ type: 'auth', token: '...' }` — received within 5 seconds
     or connection is terminated
   - Verify JWT, load user's credit balance from DB
   - If balance is 0 or negative, send `insufficient_credits` error and close
 3. **Deepgram upstream connection**
   - Open a WebSocket to Deepgram's streaming API:
     `wss://api.deepgram.com/v1/listen?model=nova-2&language=en-US&interim_results=true`
   - Auth header: `Authorization: Token <DEEPGRAM_API_KEY>`
   - Use query params from client's `config` object (whitelist allowed params)
 4. **Audio forwarding**
   - All binary messages from client → forward directly to Deepgram upstream
   - All messages from Deepgram → parse JSON, reformat, forward to client
 5. **Usage tracking**
   - Create a `transcription_sessions` row on connection
   - Maintain an in-memory `secondsUsed` counter per connection
   - Deepgram sends `{ type: 'Results', duration: X }` in responses — use this for
     accurate second counting
   - Every 10 seconds (or on disconnect), write current `secondsUsed` to DB:
     - Update `transcription_sessions.seconds_used`
     - Decrement `credit_balance.seconds_remaining`
     - Insert into `usage_ledger`
   - If `seconds_remaining` hits 0: send `insufficient_credits`, close connection
 6. **Cleanup on disconnect**
   - Mark session as `completed`, set `ended_at`
   - Do final usage flush to DB
   - Close Deepgram upstream if still open
 7. **Error handling**
   - If Deepgram upstream closes unexpectedly, notify client and close
   - If client sends malformed data, log and continue (don't crash)
 ---
 ## Phase 4 — Account Routes & Rate Limiting
 ### Tasks
 1. **Account routes** (`src/routes/account.js`)
   - `GET /account/me` — returns `{ email, credits: { seconds_remaining, minutes_remaining }, created_at }`
   - `GET /account/usage` — returns last 30 days of `usage_ledger` entries grouped by day,
     plus list of last 10 sessions with duration
 2. **Rate limiting** (`src/middleware/rateLimit.js`)
   - Use in-memory rate limiting (no Redis needed at this scale)
   - Auth endpoints: max 10 requests per minute per IP
   - WebSocket connections: max 2 concurrent connections per user
     (store active connections in a `Map<userId, Set<ws>>`)
 ---
 ## Phase 5 — Web Dashboard
 A simple, functional HTML/CSS/JS dashboard. No framework — vanilla JS is fine.
 This is a developer-friendly streamer tool, not a consumer SaaS, so clean and
 functional beats flashy.
 ### Pages
 **`/` (Landing / Login)**
 - Brief product description (what this is, why it exists)
 - Login form and link to register
 - Link to GitHub/Gitea repo
 **`/dashboard` (Post-login)**
 - Current credit balance (minutes remaining, prominently displayed)
 - "Buy Credits" section showing the three packages with Stripe Checkout buttons
 - Usage chart: last 30 days bar chart (vanilla canvas or a small CDN chart lib)
 - Recent sessions table: date, duration, status
 **`/register`**
 - Registration form
 ### Implementation Notes
 - Store JWT in `localStorage`, attach as `Authorization` header on API calls
 - Redirect to `/` if JWT missing or expired
 - Keep CSS minimal but readable — this is a utility dashboard
 ---
 ## Phase 6 — Desktop App Integration
 Changes needed in the `local-transcription` Python repo.
 ### New file: `client/remote_transcription.py`
 This module replaces `transcription_engine_realtime.py` when remote mode is active.
 ```python
 # Pseudocode / spec for Claude Code to implement
 class RemoteTranscriptionEngine:
    """
    Connects to the transcription proxy WebSocket and streams audio.
    Provides the same callback interface as the local engine so the
    rest of the app doesn't need to change.
    """
    def __init__(self, config, on_transcript_callback):
        # config contains: server_url, auth_token (or byok_api_key), model
        ...
    def start(self):
        # Open WebSocket connection
        # Send auth message
        # Start audio capture thread (reuse existing audio_capture.py)
        ...
    def stop(self):
        # Close WebSocket gracefully
        ...
    def _on_audio_chunk(self, audio_data):
        # Called by audio_capture.py with raw PCM data
        # Send as binary WebSocket frame
        ...
    def _on_server_message(self, message):
        # Parse JSON from server
        # On type='transcript': call on_transcript_callback
        # On type='credits_low': trigger UI warning
        # On type='error': surface to user
        ...
 ```
 ### BYOK Mode
 When user provides their own Deepgram key, connect directly to Deepgram instead of the proxy:
 - Endpoint: `wss://api.deepgram.com/v1/listen?...`
 - Auth: `Authorization: Token <user_key>`
 - No session tracking (Deepgram handles billing directly to the user)
 - Same `RemoteTranscriptionEngine` class, just different URL and auth header
 ### Settings Changes (`gui/settings_dialog_qt.py`)
 Add a new "Transcription Mode" section:
 ```
 Transcription Mode:
  ○ Local (Whisper)          [existing behavior]
  ○ Remote - Managed         [requires login]
  ○ Remote - BYOK            [requires Deepgram API key]
 [If Managed selected]:
  Server URL: [____________]
  [Login / Register]  [View Balance: 420 min remaining]
 [If BYOK selected]:
  Deepgram API Key: [____________]
  Model: [nova-2 ▼]
 ```
 ### Config additions (`config/default_config.yaml`)
 ```yaml
 remote:
  mode: local           # local | managed | byok
  server_url: ""        # proxy server URL for managed mode
  auth_token: ""        # JWT stored after login
  byok_api_key: ""      # Deepgram key for BYOK mode
  deepgram_model: nova-2
  language: en-US
 ```
 ---
 ## Build & Deployment Notes
 ### Docker Compose (local dev)
 ```yaml
 version: '3.8'
 services:
  db:
    image: postgres:15
    environment:
      POSTGRES_DB: transcription_proxy
      POSTGRES_USER: user
      POSTGRES_PASSWORD: password
    ports:
      - "5432:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
  app:
    build: .
    ports:
      - "3000:3000"
    environment:
      DATABASE_URL: postgresql://user:password@db:5432/transcription_proxy
    depends_on:
      - db
    volumes:
      - .:/app
      - /app/node_modules
 volumes:
  pgdata:
 ```
 ### Production Deployment
 This service is a good fit for deployment on AnHonestHost WHP as a containerized app,
 or on a small DigitalOcean/Linode VPS. Requirements are light:
 - 512MB RAM is sufficient
 - Postgres can be the same instance as other services or managed (e.g., Supabase free tier)
 - Needs a public domain with SSL for WebSocket (`wss://`) to work from desktop clients
 Reverse proxy config (Nginx or HAProxy) should:
 - Proxy HTTP → `localhost:3000`
 - Pass `Upgrade` and `Connection` headers for WebSocket support
 - Set `proxy_read_timeout 3600` (sessions can be long)
 ---
 ## Implementation Order
 Build and test in this sequence:
 1. Project scaffold + DB connection + migrations
 2. Auth (register/login/JWT) — test with curl
 3. Stripe billing + webhook — test with Stripe CLI (`stripe listen`)
 4. WebSocket proxy — test with a simple browser WebSocket client first
 5. Usage tracking and credit decrement
 6. Account/usage routes
 7. Web dashboard
 8. Desktop app integration (separate PR in local-transcription repo)
 ---
 ## Key Decisions & Rationale
 | Decision | Choice | Reason |
 |---|---|---|
 | Credits model | Prepaid | No surprise charges, simpler billing, better for irregular streamer usage |
 | WebSocket library | `ws` | Lightweight, no abstraction overhead, plays well with raw binary audio |
 | Auth | JWT (stateless) | Desktop app holds token locally; no session store needed |
 | DB driver | `node-postgres` (pg) | No ORM overhead; schema is simple enough for raw SQL |
 | Migrations | Raw SQL files | No dependency on Knex/Prisma; easy to inspect and reason about |
 | Rate limiting | In-memory | Redis is overkill for this scale; single-process Node is fine initially |
 | Frontend | Vanilla JS | Dashboard is simple utility UI; no framework justified |
 ---
 ## What This Plan Does NOT Cover (Future Work)
 - OAuth / social login
 - Admin panel for managing users
 - Refund / credit adjustment tooling
 - Email verification
 - Password reset flow
 - Multi-language support beyond Deepgram's defaults
 - Analytics / aggregated usage reporting
 - Self-hosted Whisper inference as a third backend option
--- a/README.md
+++ b/README.md
@@ -1,494 +1,318 @@
-# Local Transcription for Streamers
+# Local Transcription
-A local speech-to-text application designed for streamers that provides real-time transcription using Whisper or similar models. Multiple users can run the application locally and sync their transcriptions to a centralized web stream that can be easily captured in OBS or other streaming software.
+A real-time speech-to-text desktop application for streamers. Runs locally on your machine with GPU or CPU, displays transcriptions via OBS browser source, and optionally syncs with other users through a multi-user server.
 **Version 1.4.0**
 ## Features
- **Standalone Desktop Application**: Use locally with built-in GUI display - no server required
+- **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
- **Local Transcription**: Run Whisper (or compatible models) locally on your machine
+- **Cross-Platform**: Native desktop app for Windows, macOS, and Linux via [Tauri](https://tauri.app/)
- **CPU/GPU Support**: Choose between CPU or GPU processing based on your hardware
+- **Dual Transcription Modes**: Local (Whisper) or cloud (Deepgram) with managed billing or BYOK
- **Real-time Processing**: Live audio transcription with minimal latency
+- **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
 - **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
 - **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
 - **Multi-User Sync**: Optional Node.js server to sync transcriptions across multiple users
 - **Custom Fonts**: Support for system fonts, web-safe fonts, Google Fonts, and custom font files
 - **Customizable Colors**: User-configurable colors for name, text, and background
 - **Noise Suppression**: Built-in audio preprocessing to reduce background noise
- **User Configuration**: Set your display name and preferences through the GUI
+- **Auto-Updates**: Automatic update checking with release notes display
- **Optional Multi-user Sync**: Connect to a server to sync transcriptions with other users
+
- **OBS Integration**: Web-based output designed for easy browser source capture
+## Architecture
- **Privacy-First**: All processing happens locally; only transcription text is shared
+
- **Customizable**: Configure model size, language, and streaming settings
+The application uses a two-process architecture:
 1. **Tauri Shell** (Svelte 5 frontend) — lightweight native window (~50MB) rendering the UI
 2. **Python Backend** (sidecar) — headless process running transcription, audio capture, and the OBS web server
 The Tauri frontend communicates with the Python backend via REST API and WebSocket, following the same pattern as [voice-to-notes](https://repo.anhonesthost.net/MacroPad/voice-to-notes).
 ```
 Tauri App (user launches this)
  └─ Spawns Python backend as sidecar
       ├─ FastAPI REST API (control endpoints)
       ├─ WebSocket /ws/control (real-time state + transcriptions)
       ├─ OBS web display at http://localhost:8080
       └─ Transcription engine (Whisper or Deepgram)
 ```
 > **Legacy GUI**: The original PySide6/Qt desktop GUI (`main.py`) still works alongside the new Tauri frontend during the transition period.
 ## Quick Start
 ### Running from Source
 ```bash
-# Install dependencies
+# Install Python dependencies
 uv sync
-# Run the application
+# Run the Tauri app (frontend + backend)
 npm install
 npm run tauri dev
 # Or run just the headless backend (for development)
 uv run python -m backend.main_headless
 # Or run the legacy PySide6 GUI
 uv run python main.py
 ```
-### Building Standalone Executables
+### Using Pre-Built Executables
-To create standalone executables for distribution:
+Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases):
 - **App installer** (Tauri shell): `.msi` (Windows), `.dmg` (macOS), `.deb`/`.rpm`/`.AppImage` (Linux)
 - **Sidecar** (Python backend): Download the matching `sidecar-*` zip for your platform (CUDA or CPU)
 ### Building from Source
 **Linux:**
 ```bash
-./build.sh
+# Build the Tauri app
-```
+npm install
 npm run tauri build
 # Output: src-tauri/target/release/bundle/
-**Windows:**
+# Build the Python sidecar (headless, no Qt)
-```cmd
+uv sync
 uv run pyinstaller local-transcription-headless.spec
 # Output: dist/local-transcription-backend/
 # Build the legacy PySide6 app (Linux)
 ./build.sh
 # Build the legacy PySide6 app (Windows)
 build.bat
 ```
 For detailed build instructions, see [BUILD.md](BUILD.md).
-## Architecture Overview
+## Usage
-The application can run in two modes:
+### Standalone Mode
-### Standalone Mode (No Server Required):
+1. Launch the application
-1. **Desktop Application**: Captures audio, performs speech-to-text, and displays transcriptions locally in a GUI window
+2. Select your microphone from the audio device dropdown
 3. Choose a Whisper model (smaller = faster, larger = more accurate):
   - `tiny.en` / `tiny` — Fastest, good for quick captions
   - `base.en` / `base` — Balanced speed and accuracy
   - `small.en` / `small` — Better accuracy
   - `medium.en` / `medium` — High accuracy
   - `large-v3` — Best accuracy (requires more resources)
 4. Click **Start** to begin transcription
 5. Transcriptions appear in the main window and at `http://localhost:8080`
-### Multi-user Sync Mode (Optional):
+### Remote Transcription (Deepgram)
 1. **Local Transcription Client**: Captures audio, performs speech-to-text, and sends results to the web server
 2. **Centralized Web Server**: Aggregates transcriptions from multiple clients and serves a web stream
 3. **Web Stream Interface**: Browser-accessible page displaying synchronized transcriptions (for OBS capture)
-## Use Cases
+Instead of local Whisper models, you can use cloud-based transcription:
- **Multi-language Streams**: Multiple translators transcribing in different languages
+- **Managed mode**: Sign up via the transcription proxy for metered billing
- **Accessibility**: Provide real-time captions for viewers
+- **BYOK mode**: Bring your own Deepgram API key for direct access
 - **Collaborative Podcasts**: Multiple hosts with separate transcriptions
 - **Gaming Commentary**: Track who said what in multiplayer sessions
---
+Configure in Settings > Remote Transcription.
-## Implementation Plan
+### OBS Browser Source Setup
-### Phase 1: Standalone Desktop Application
+1. Start the Local Transcription app
 2. In OBS, add a **Browser** source
 3. Set URL to `http://localhost:8080`
 4. Set dimensions (e.g., 1920x300)
 5. Check "Shutdown source when not visible" for performance
-**Objective**: Build a fully functional standalone transcription app with GUI that works without any server
+### Multi-User Mode (Optional)
-#### Components:
+For syncing transcriptions across multiple users (e.g., multi-host streams or translation teams):
 1. **Audio Capture Module**
   - Capture system audio or microphone input
   - Support multiple audio sources (virtual audio cables, physical devices)
   - Real-time audio buffering with configurable chunk sizes
   - **Noise Suppression**: Preprocess audio to reduce background noise
   - Libraries: `pyaudio`, `sounddevice`, `noisereduce`, `webrtcvad`
-2. **Noise Suppression Engine**
+1. Deploy the Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
-   - Real-time noise reduction using RNNoise or noisereduce
+2. In the app settings, enable **Server Sync**
-   - Adjustable noise reduction strength
+3. Enter the server URL (e.g., `http://your-server:3000/api/send`)
-   - Optional VAD (Voice Activity Detection) to skip silent segments
+4. Set a room name and passphrase (shared with other users)
-   - Libraries: `noisereduce`, `rnnoise-python`, `webrtcvad`
+5. In OBS, use the server's display URL with your room name:
   ```
   http://your-server:3000/display?room=YOURROOM&timestamps=true&maxlines=50
   ```
-3. **Transcription Engine**
+## Configuration
   - Integrate OpenAI Whisper (or alternatives: faster-whisper, whisper.cpp)
   - Support multiple model sizes (tiny, base, small, medium, large)
   - CPU and GPU inference options
   - Model management and automatic downloading
   - Libraries: `openai-whisper`, `faster-whisper`, `torch`
-4. **Device Selection**
+Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel or the REST API.
   - Auto-detect available compute devices (CPU, CUDA, MPS for Mac)
   - Allow user to specify preferred device via GUI
   - Graceful fallback if GPU unavailable
   - Display device status and performance metrics
-5. **Desktop GUI Application**
+### Key Settings
   - Cross-platform GUI using PyQt6, Tkinter, or CustomTkinter
   - Main transcription display window (scrolling text area)
   - Settings panel for configuration
   - User name input field
   - Audio input device selector
   - Model size selector
   - CPU/GPU toggle
   - Start/Stop transcription button
   - Optional: System tray integration
   - Libraries: `PyQt6`, `customtkinter`, or `tkinter`
-6. **Local Display**
+| Setting | Description | Default |
-   - Real-time transcription display in GUI window
+|---------|-------------|---------|
-   - Scrolling text with timestamps
+| `transcription.model` | Whisper model to use | `base.en` |
-   - User name/label shown with transcriptions
+| `transcription.device` | Processing device (auto/cuda/cpu) | `auto` |
-   - Copy transcription to clipboard
+| `transcription.enable_realtime_transcription` | Show preview while speaking | `false` |
-   - Optional: Save transcription to file (TXT, SRT, VTT)
+| `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
 | `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
 | `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
 | `remote.mode` | Transcription mode (local/managed/byok) | `local` |
 | `display.show_timestamps` | Show timestamps with transcriptions | `true` |
 | `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
 | `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
 | `web_server.port` | Local web server port | `8080` |
-#### Tasks:
+See [config/default_config.yaml](config/default_config.yaml) for all available options.
 - [ ] Set up project structure and dependencies
 - [ ] Implement audio capture with device selection
 - [ ] Add noise suppression and VAD preprocessing
 - [ ] Integrate Whisper model loading and inference
 - [ ] Add CPU/GPU device detection and selection logic
 - [ ] Create real-time audio buffer processing pipeline
 - [ ] Design and implement GUI layout (main window)
 - [ ] Add settings panel with user name configuration
 - [ ] Implement local transcription display area
 - [ ] Add start/stop controls and status indicators
 - [ ] Test transcription accuracy and latency
 - [ ] Test noise suppression effectiveness
 ---
 ### Phase 2: Web Server and Sync System
 **Objective**: Create a centralized server to aggregate and serve transcriptions
 #### Components:
 1. **Web Server**
   - FastAPI or Flask-based REST API
   - WebSocket support for real-time updates
   - User/client registration and management
   - Libraries: `fastapi`, `uvicorn`, `websockets`
 2. **Transcription Aggregator**
   - Receive transcription chunks from multiple clients
   - Associate transcriptions with user IDs/names
   - Timestamp management and synchronization
   - Buffer management for smooth streaming
 3. **Database/Storage** (Optional)
   - Store transcription history (SQLite for simplicity)
   - Session management
   - Export functionality (SRT, VTT, TXT formats)
 #### API Endpoints:
 - `POST /api/register` - Register a new client
 - `POST /api/transcription` - Submit transcription chunk
 - `WS /api/stream` - WebSocket for real-time transcription stream
 - `GET /stream` - Web page for OBS browser source
 #### Tasks:
 - [ ] Set up FastAPI server with CORS support
 - [ ] Implement WebSocket handler for real-time streaming
 - [ ] Create client registration system
 - [ ] Build transcription aggregation logic
 - [ ] Add timestamp synchronization
 - [ ] Create data models for clients and transcriptions
 ---
 ### Phase 3: Client-Server Communication (Optional Multi-user Mode)
 **Objective**: Add optional server connectivity to enable multi-user transcription sync
 #### Components:
 1. **HTTP/WebSocket Client**
   - Register client with server on startup
   - Send transcription chunks as they're generated
   - Handle connection drops and reconnection
   - Libraries: `requests`, `websockets`
 2. **Configuration System**
   - Config file for server URL, API keys, user settings
   - Model preferences (size, language)
   - Audio input settings
   - Format: YAML or JSON
 3. **Status Monitoring**
   - Connection status indicator
   - Transcription queue health
   - Error handling and logging
 #### Tasks:
 - [ ] Add "Enable Server Sync" toggle to GUI
 - [ ] Add server URL configuration field in settings
 - [ ] Implement WebSocket client for sending transcriptions
 - [ ] Add configuration file support (YAML/JSON)
 - [ ] Create connection management with auto-reconnect
 - [ ] Add local logging and error handling
 - [ ] Add server connection status indicator to GUI
 - [ ] Allow app to function normally if server is unavailable
 ---
 ### Phase 4: Web Stream Interface (OBS Integration)
 **Objective**: Create a web page that displays synchronized transcriptions for OBS
 #### Components:
 1. **Web Frontend**
   - HTML/CSS/JavaScript page for displaying transcriptions
   - Responsive design with customizable styling
   - Auto-scroll with configurable retention window
   - Libraries: Vanilla JS or lightweight framework (Alpine.js, htmx)
 2. **Styling Options**
   - Customizable fonts, colors, sizes
   - Background transparency for OBS chroma key
   - User name/ID display options
   - Timestamp display (optional)
 3. **Display Modes**
   - Scrolling captions (like live TV captions)
   - Multi-user panel view (separate sections per user)
   - Overlay mode (minimal UI for transparency)
 #### Tasks:
 - [ ] Create HTML template for transcription display
 - [ ] Implement WebSocket client in JavaScript
 - [ ] Add CSS styling with OBS-friendly transparency
 - [ ] Create customization controls (URL parameters or UI)
 - [ ] Test with OBS browser source
 - [ ] Add configurable retention/scroll behavior
 ---
 ### Phase 5: Advanced Features
 **Objective**: Enhance functionality and user experience
 #### Features:
 1. **Language Detection**
   - Auto-detect spoken language
   - Multi-language support in single stream
   - Language selector in GUI
 2. **Speaker Diarization** (Optional)
   - Identify different speakers
   - Label transcriptions by speaker
   - Useful for multi-host streams
 3. **Profanity Filtering**
   - Optional word filtering/replacement
   - Customizable filter lists
   - Toggle in GUI settings
 4. **Advanced Noise Profiles**
   - Save and load custom noise profiles
   - Adaptive noise suppression
   - Different profiles for different environments
 5. **Export Functionality**
   - Save transcriptions in multiple formats (TXT, SRT, VTT, JSON)
   - Export button in GUI
   - Automatic session saving
 6. **Hotkey Support**
   - Global hotkeys to start/stop transcription
   - Mute/unmute hotkey
   - Quick save hotkey
 7. **Docker Support**
   - Containerized server deployment
   - Docker Compose for easy multi-component setup
   - Pre-built images for easy deployment
 8. **Themes and Customization**
   - Dark/light theme toggle
   - Customizable font sizes and colors for display
   - OBS-friendly transparent overlay mode
 #### Tasks:
 - [ ] Add language detection and multi-language support
 - [ ] Implement speaker diarization
 - [ ] Create optional profanity filter
 - [ ] Add export functionality (SRT, VTT, plain text, JSON)
 - [ ] Implement global hotkey support
 - [ ] Create Docker containers for server component
 - [ ] Add theme customization options
 - [ ] Create advanced noise profile management
 ---
 ## Technology Stack
 ### Local Client:
 - **Python 3.9+**
 - **GUI**: PyQt6 / CustomTkinter / tkinter
 - **Audio**: PyAudio / sounddevice
 - **Noise Suppression**: noisereduce / rnnoise-python
 - **VAD**: webrtcvad
 - **ML Framework**: PyTorch (for Whisper)
 - **Transcription**: openai-whisper / faster-whisper
 - **Networking**: websockets, requests (optional for server sync)
 - **Config**: PyYAML / json
 ### Server:
 - **Backend**: FastAPI / Flask
 - **WebSocket**: python-websockets / FastAPI WebSockets
 - **Server**: Uvicorn / Gunicorn
 - **Database** (optional): SQLite / PostgreSQL
 - **CORS**: fastapi-cors
 ### Web Interface:
 - **Frontend**: HTML5, CSS3, JavaScript (ES6+)
 - **Real-time**: WebSocket API
 - **Styling**: CSS Grid/Flexbox for layout
 ---
 ## Project Structure
 ```
 local-transcription/
- client/                      # Local transcription client
+├── src/                             # Svelte 5 frontend (Tauri UI)
-    __init__.py
+│   ├── App.svelte                   # Main app shell
-    audio_capture.py         # Audio input handling
+│   ├── lib/components/              # UI components
-    transcription_engine.py  # Whisper integration
+│   │   ├── Header.svelte
-    network_client.py        # Server communication
+│   │   ├── StatusBar.svelte
-    config.py                # Configuration management
+│   │   ├── Controls.svelte
-    main.py                  # Client entry point
+│   │   ├── TranscriptionDisplay.svelte
- server/                      # Centralized web server
+│   │   └── Settings.svelte
-    __init__.py
+│   └── lib/stores/                  # Reactive state management
-    api.py                   # FastAPI routes
+│       ├── backend.ts               # WebSocket + REST API client
-    websocket_handler.py     # WebSocket management
+│       ├── config.ts                # App configuration
-    models.py                # Data models
+│       └── transcriptions.ts        # Transcription data
-    database.py              # Optional DB layer
+├── src-tauri/                       # Tauri v2 Rust shell
-    main.py                  # Server entry point
+│   ├── src/main.rs
- web/                         # Web stream interface
+│   └── tauri.conf.json
-    index.html               # OBS browser source page
+├── backend/                         # Headless Python backend (sidecar)
-    styles.css               # Customizable styling
+│   ├── app_controller.py            # Orchestration logic (engine, sync, config)
-    app.js                   # WebSocket client & UI logic
+│   ├── api_server.py                # FastAPI REST + WebSocket control API
- config/
+│   └── main_headless.py             # Headless entry point
-    client_config.example.yaml
+├── client/                          # Core transcription modules
-    server_config.example.yaml
+│   ├── audio_capture.py             # Audio input handling
- tests/
+│   ├── transcription_engine_realtime.py  # RealtimeSTT / Whisper
-    test_audio.py
+│   ├── deepgram_transcription.py    # Deepgram cloud transcription
-    test_transcription.py
+│   ├── noise_suppression.py         # VAD and noise reduction
-    test_server.py
+│   ├── device_utils.py              # CPU/GPU/MPS detection
- requirements.txt             # Python dependencies
+│   ├── config.py                    # Configuration management
- README.md
+│   ├── server_sync.py               # Multi-user server client
- main.py                      # Combined launcher (optional)
+│   └── update_checker.py            # Auto-update functionality
 ├── gui/                             # Legacy PySide6/Qt GUI
 │   ├── main_window_qt.py
 │   ├── settings_dialog_qt.py
 │   └── transcription_display_qt.py
 ├── server/                          # Web servers
 │   ├── web_display.py               # Local FastAPI server for OBS
 │   └── nodejs/                      # Multi-user sync server
 ├── .gitea/workflows/                # CI/CD
 │   ├── release.yml                  # Tauri app builds (all platforms)
 │   └── build-sidecar.yml            # Python sidecar builds (CUDA + CPU)
 ├── config/
 │   └── default_config.yaml          # Default settings template
 ├── main.py                          # Legacy GUI entry point
 ├── main_cli.py                      # CLI version (for testing)
 ├── local-transcription.spec         # PyInstaller config (legacy, with PySide6)
 ├── local-transcription-headless.spec # PyInstaller config (headless sidecar)
 ├── pyproject.toml                   # Python dependencies
 └── package.json                     # Node.js / Tauri dependencies
 ```
---
+## Technology Stack
-## Installation (Planned)
+### Frontend (Tauri)
 - **Tauri v2** — Native cross-platform shell (Rust)
 - **Svelte 5** — Reactive UI framework (TypeScript)
 - **Vite** — Frontend build tool
-### Prerequisites:
+### Backend (Python Sidecar)
- Python 3.9 or higher
+- **Python 3.9+**
- CUDA-capable GPU (optional, for GPU acceleration)
+- **FastAPI + Uvicorn** — REST API and WebSocket server
- FFmpeg (required by Whisper)
+- **RealtimeSTT** — Real-time speech-to-text with advanced VAD
 - **faster-whisper** — Optimized Whisper model inference (CTranslate2)
 - **PyTorch** — ML framework (CUDA-enabled builds available)
 - **sounddevice** — Cross-platform audio capture
 - **webrtcvad + silero_vad** — Voice activity detection
-### Steps:
+### Multi-User Server (Optional)
 - **Node.js + Express + WebSocket** — Real-time sync server
-1. **Clone the repository**
+### Build & CI/CD
-   ```bash
+- **PyInstaller** — Python sidecar packaging
-   git clone <repository-url>
+- **Tauri CLI** — App bundling (.msi, .dmg, .deb, .rpm, .AppImage)
-   cd local-transcription
+- **Gitea Actions** — Automated cross-platform builds
-   ```
+- **uv** — Fast Python package manager
-2. **Install dependencies**
+## CI/CD
   ```bash
   pip install -r requirements.txt
   ```
-3. **Download Whisper models**
+Two Gitea Actions workflows in `.gitea/workflows/`:
   ```bash
   # Models will be auto-downloaded on first run
   # Or manually download:
   python -c "import whisper; whisper.load_model('base')"
   ```
-4. **Configure client**
+| Workflow | Trigger | Produces |
-   ```bash
+|----------|---------|----------|
-   cp config/client_config.example.yaml config/client_config.yaml
+| `release.yml` | Push to `main` | Tauri app installers for all platforms |
-   # Edit config/client_config.yaml with your settings
+| `build-sidecar.yml` | Changes to `client/`, `server/`, `backend/`, or `pyproject.toml` | Python sidecar zips (CUDA + CPU) |
   ```
-5. **Run the server** (one instance)
+Both workflows require a `BUILD_TOKEN` secret in the repo settings (Gitea API token with release write access).
   ```bash
   python server/main.py
   ```
-6. **Run the client** (on each user's machine)
+### Release Artifacts
   ```bash
   python client/main.py
   ```
-7. **Add to OBS**
+| Platform | App Installer | Sidecar (CUDA) | Sidecar (CPU) |
-   - Add a Browser Source
+|----------|--------------|----------------|---------------|
-   - URL: `http://<server-ip>:8000/stream`
+| Linux x86_64 | `.deb`, `.rpm`, `.AppImage` | `sidecar-linux-x86_64-cuda.zip` | `sidecar-linux-x86_64-cpu.zip` |
-   - Set width/height as needed
+| Windows x86_64 | `.msi`, `-setup.exe` | `sidecar-windows-x86_64-cuda.zip` | `sidecar-windows-x86_64-cpu.zip` |
-   - Check "Shutdown source when not visible" for performance
+| macOS ARM64 | `.dmg` | — | `sidecar-macos-aarch64-cpu.zip` |
---
+## System Requirements
-## Configuration (Planned)
+### Minimum
 - 4GB RAM
 - Any modern CPU
-### Client Configuration:
+### Recommended (for local real-time transcription)
-```yaml
+- 8GB+ RAM
-user:
+- NVIDIA GPU with CUDA support (for GPU acceleration)
  name: "Streamer1"          # Display name for transcriptions
  id: "unique-user-id"       # Optional unique identifier
-audio:
+### For Building
-  input_device: "default"    # or specific device index
+- **Tauri app**: Node.js 20+, Rust stable, platform SDK (see [Tauri prerequisites](https://tauri.app/start/prerequisites/))
-  sample_rate: 16000
+- **Python sidecar**: Python 3.9+, uv, PyInstaller
-  chunk_duration: 2.0        # seconds
+- **Linux**: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
 - **Windows**: Visual Studio Build Tools, WebView2
 - **macOS**: Xcode Command Line Tools
-noise_suppression:
+## Troubleshooting
  enabled: true              # Enable/disable noise reduction
  strength: 0.7              # 0.0 to 1.0 - reduction strength
  method: "noisereduce"      # "noisereduce" or "rnnoise"
-transcription:
+### Model Loading Issues
-  model: "base"              # tiny, base, small, medium, large
+- Models download automatically on first use to `~/.cache/huggingface/`
-  device: "cuda"             # cpu, cuda, mps
+- First run requires internet connection
-  language: "en"             # or "auto" for detection
+- Check disk space (models range from 75MB to 3GB)
  task: "transcribe"         # or "translate"
-processing:
+### Audio Device Issues
-  use_vad: true              # Voice Activity Detection
+```bash
-  min_confidence: 0.5        # Minimum transcription confidence
+# List available audio devices
-
+uv run python main_cli.py --list-devices
 server_sync:
  enabled: false             # Enable multi-user server sync
  url: "ws://localhost:8000" # Server URL (when enabled)
  api_key: ""                # Optional API key
 display:
  show_timestamps: true      # Show timestamps in local display
  max_lines: 100             # Maximum lines to keep in display
  font_size: 12              # GUI font size
 ```
 - Ensure microphone permissions are granted (especially on macOS)
 - Try different device indices in settings
-### Server Configuration:
+### GPU Not Detected
-```yaml
+```bash
-server:
+# Check CUDA availability
-  host: "0.0.0.0"
+uv run python -c "import torch; print(torch.cuda.is_available())"
  port: 8000
  api_key_required: false
 stream:
  max_clients: 10
  buffer_size: 100         # messages to buffer
  retention_time: 300      # seconds
 database:
  enabled: false
  path: "transcriptions.db"
 ```
 - Install NVIDIA drivers (CUDA toolkit is bundled in CUDA sidecar builds)
 - The app automatically falls back to CPU if no GPU is available
---
+### Web Server Port Conflicts
 - Default port is 8080; the app tries ports 8080-8084 automatically
 - Change in settings or edit config file
 - Check for conflicts: `lsof -i :8080` (Linux/macOS) or `netstat -ano | findstr :8080` (Windows)
-## Roadmap
+## Use Cases
- [x] Project planning and architecture design
+- **Live Streaming Captions**: Add real-time captions to your Twitch/YouTube streams
- [ ] Phase 1: Standalone desktop application with GUI
+- **Multi-Language Translation**: Multiple translators transcribing in different languages
- [ ] Phase 2: Web server and sync system (optional multi-user mode)
+- **Accessibility**: Provide captions for hearing-impaired viewers
- [ ] Phase 3: Client-server communication (optional)
+- **Podcast Recording**: Real-time transcription for multi-host shows
- [ ] Phase 4: Web stream interface for OBS (optional)
+- **Gaming Commentary**: Track who said what in multiplayer sessions
 - [ ] Phase 5: Advanced features (hotkeys, themes, Docker, etc.)
 ---
 ## Contributing
-Contributions are welcome! Please feel free to submit issues or pull requests.
+Contributions are welcome! Please feel free to submit issues or pull requests at the [repository](https://repo.anhonesthost.net/streamer-tools/local-transcription).
 ---
 ## License
-[Choose appropriate license - MIT, Apache 2.0, etc.]
+MIT License
 ---
 ## Acknowledgments
- OpenAI Whisper for the excellent speech recognition model
+- [OpenAI Whisper](https://github.com/openai/whisper) for the speech recognition model
- The streaming community for inspiration and use cases
+- [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT) for real-time transcription capabilities
 - [faster-whisper](https://github.com/guillaumekln/faster-whisper) for optimized inference
 - [Tauri](https://tauri.app/) for the cross-platform desktop framework
 - [Deepgram](https://deepgram.com/) for cloud transcription API
--- a/backend/init.py
+++ b/backend/init.py
@@ -0,0 +1 @@
 """Backend package for headless transcription service."""
--- a/backend/api_server.py
+++ b/backend/api_server.py
@@ -0,0 +1,335 @@
 """FastAPI control API server for the headless transcription backend.
 Extends the existing OBS display server with REST endpoints and a
 control WebSocket channel so that a Tauri (or any other) frontend
 can drive the application.
 """
 import asyncio
 import json
 from datetime import datetime
 from typing import List, Optional
 from fastapi import FastAPI, WebSocket, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from backend.app_controller import AppController
 # ── Request / Response Models ──────────────────────────────────────
 class ConfigUpdate(BaseModel):
    """Batch config update payload. Keys use dot-notation."""
    settings: dict  # e.g. {"user.name": "Alice", "transcription.model": "small.en"}
 class LoginRequest(BaseModel):
    email: str
    password: str
    server_url: str
 class RegisterRequest(BaseModel):
    email: str
    password: str
    server_url: str
 class SkipVersionRequest(BaseModel):
    version: str
 class SaveFileRequest(BaseModel):
    path: str
    text: str
 # ── API Server ─────────────────────────────────────────────────────
 class APIServer:
    """Wraps AppController with a FastAPI application exposing control endpoints."""
    def __init__(self, controller: AppController):
        self.controller = controller
        self.control_connections: List[WebSocket] = []
        self.app = FastAPI(title="Local Transcription API", version="1.0.0")
        # Allow Tauri webview origin
        self.app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],  # Tauri uses tauri://localhost or https://tauri.localhost
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )
        self._setup_routes()
        self._wire_controller_callbacks()
    def _wire_controller_callbacks(self):
        """Wire AppController callbacks to broadcast over /ws/control."""
        original_state_cb = self.controller.on_state_changed
        def on_state_changed(state: str, message: str):
            if original_state_cb:
                original_state_cb(state, message)
            self._broadcast_control({"type": "state_changed", "state": state, "message": message})
        self.controller.on_state_changed = on_state_changed
        def on_transcription(data: dict):
            self._broadcast_control({"type": "transcription", **data})
        self.controller.on_transcription = on_transcription
        def on_preview(data: dict):
            self._broadcast_control({"type": "preview", **data})
        self.controller.on_preview = on_preview
        def on_error(msg: str):
            self._broadcast_control({"type": "error", "message": msg})
        self.controller.on_error = on_error
        def on_credits_low(seconds: int):
            self._broadcast_control({"type": "credits_low", "seconds_remaining": seconds})
        self.controller.on_credits_low = on_credits_low
    def set_event_loop(self, loop: asyncio.AbstractEventLoop):
        """Set the event loop used for broadcasting (call from uvicorn startup)."""
        self._event_loop = loop
    def _broadcast_control(self, data: dict):
        """Send a message to all connected /ws/control clients."""
        if not self.control_connections:
            return
        loop = getattr(self, '_event_loop', None)
        if loop is None:
            return
        message = json.dumps(data)
        disconnected = []
        for ws in self.control_connections:
            try:
                asyncio.run_coroutine_threadsafe(
                    ws.send_text(message),
                    loop,
                )
            except Exception:
                disconnected.append(ws)
        for ws in disconnected:
            self.control_connections.remove(ws)
    def _setup_routes(self):
        """Register all API routes."""
        app = self.app
        ctrl = self.controller
        @app.on_event("startup")
        async def on_startup():
            self.set_event_loop(asyncio.get_event_loop())
        # ── Status ─────────────────────────────────────────────
        @app.get("/api/status")
        async def get_status():
            return ctrl.get_status()
        @app.get("/api/version")
        async def get_version():
            from version import __version__
            return {"version": __version__}
        # ── Transcription Control ──────────────────────────────
        @app.post("/api/start")
        async def start_transcription():
            success, message = ctrl.start_transcription()
            if not success:
                raise HTTPException(status_code=400, detail=message)
            return {"status": "ok", "message": message}
        @app.post("/api/stop")
        async def stop_transcription():
            success, message = ctrl.stop_transcription()
            if not success:
                raise HTTPException(status_code=400, detail=message)
            return {"status": "ok", "message": message}
        @app.post("/api/clear")
        async def clear_transcriptions():
            count = ctrl.clear_transcriptions()
            return {"status": "ok", "cleared": count}
        @app.get("/api/transcriptions")
        async def get_transcriptions():
            show_timestamps = ctrl.config.get('display.show_timestamps', True)
            return {
                "count": len(ctrl.transcriptions),
                "text": ctrl.get_transcriptions_text(include_timestamps=show_timestamps),
                "items": [
                    {
                        "text": r.text,
                        "user_name": r.user_name,
                        "timestamp": r.timestamp.strftime("%H:%M:%S") if r.timestamp else None,
                    }
                    for r in ctrl.transcriptions
                ],
            }
        @app.post("/api/save-file")
        async def save_file(req: SaveFileRequest):
            """Save text to a file (used by Tauri frontend after dialog)."""
            from pathlib import Path
            try:
                Path(req.path).write_text(req.text, encoding="utf-8")
                return {"status": "ok", "path": req.path}
            except Exception as e:
                raise HTTPException(status_code=500, detail=str(e))
        # ── Configuration ──────────────────────────────────────
        @app.get("/api/config")
        async def get_config():
            return ctrl.config.config
        @app.put("/api/config")
        async def update_config(update: ConfigUpdate):
            engine_reloaded, message = ctrl.apply_settings(update.settings)
            return {
                "status": "ok",
                "message": message,
                "engine_reloaded": engine_reloaded,
            }
        # ── Devices ────────────────────────────────────────────
        @app.get("/api/audio-devices")
        async def get_audio_devices():
            return {"devices": ctrl.get_audio_devices()}
        @app.get("/api/compute-devices")
        async def get_compute_devices():
            return {"devices": ctrl.get_compute_devices()}
        # ── Engine ─────────────────────────────────────────────
        @app.post("/api/reload-engine")
        async def reload_engine():
            success, message = ctrl.reload_engine()
            if not success:
                raise HTTPException(status_code=500, detail=message)
            return {"status": "ok", "message": message}
        # ── Updates ────────────────────────────────────────────
        @app.get("/api/check-update")
        async def check_update():
            return ctrl.check_for_updates()
        @app.post("/api/skip-version")
        async def skip_version(req: SkipVersionRequest):
            ctrl.skip_version(req.version)
            return {"status": "ok"}
        # ── Managed Mode Auth Proxy ────────────────────────────
        @app.post("/api/login")
        async def login(req: LoginRequest):
            """Proxy login to the transcription proxy server."""
            import requests as http_requests
            try:
                resp = http_requests.post(
                    f"{req.server_url}/api/auth/login",
                    json={"email": req.email, "password": req.password},
                    timeout=10,
                )
                if resp.status_code == 200:
                    data = resp.json()
                    ctrl.config.set('remote.auth_token', data.get('token', ''))
                    ctrl.config.set('remote.server_url', req.server_url)
                    return {"status": "ok", "token": data.get('token', '')}
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        @app.post("/api/register")
        async def register(req: RegisterRequest):
            """Proxy registration to the transcription proxy server."""
            import requests as http_requests
            try:
                resp = http_requests.post(
                    f"{req.server_url}/api/auth/register",
                    json={"email": req.email, "password": req.password},
                    timeout=10,
                )
                if resp.status_code in (200, 201):
                    return {"status": "ok", "data": resp.json()}
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        @app.get("/api/balance")
        async def get_balance():
            """Proxy balance check to the transcription proxy server."""
            import requests as http_requests
            server_url = ctrl.config.get('remote.server_url', '')
            token = ctrl.config.get('remote.auth_token', '')
            if not server_url or not token:
                raise HTTPException(status_code=400, detail="Not logged in to managed service")
            try:
                resp = http_requests.get(
                    f"{server_url}/api/billing/balance",
                    headers={"Authorization": f"Bearer {token}"},
                    timeout=10,
                )
                if resp.status_code == 200:
                    return resp.json()
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        # ── Control WebSocket ──────────────────────────────────
        @app.websocket("/ws/control")
        async def websocket_control(websocket: WebSocket):
            """WebSocket channel for real-time state and transcription push."""
            await websocket.accept()
            self.control_connections.append(websocket)
            # Send current status on connect
            try:
                await websocket.send_json({
                    "type": "state_changed",
                    "state": ctrl.state,
                    "message": "Connected",
                })
            except Exception:
                pass
            try:
                while True:
                    # Keep alive -- client sends pings
                    await websocket.receive_text()
            except Exception:
                if websocket in self.control_connections:
                    self.control_connections.remove(websocket)
        # ── Mount the existing OBS display routes ──────────────
        # The OBS display (GET / and /ws) is handled by the
        # TranscriptionWebServer which shares the same Uvicorn
        # instance. We mount it as a sub-application so the
        # existing OBS URLs continue to work.
        if ctrl.web_server:
            app.mount("/obs", ctrl.web_server.app)
--- a/backend/app_controller.py
+++ b/backend/app_controller.py
@@ -0,0 +1,692 @@
 """Headless application controller for transcription backend.
 Extracts orchestration logic from gui/main_window_qt.py into a
 Qt-free class that manages engine lifecycle, web server, server sync,
 and configuration -- all accessible via callbacks instead of Qt signals.
 """
 import asyncio
 import time
 from datetime import datetime
 from pathlib import Path
 from threading import Thread, Lock
 from typing import Callable, List, Optional
 import sys
 # Add project root to path
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from client.config import Config
 from client.device_utils import DeviceManager
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from server.web_display import TranscriptionWebServer
 from version import __version__
 class AppState:
    """Enum-like class for application states."""
    INITIALIZING = "initializing"
    READY = "ready"
    TRANSCRIBING = "transcribing"
    RELOADING = "reloading"
    ERROR = "error"
 class WebServerThread(Thread):
    """Thread for running the web server."""
    def __init__(self, web_server: TranscriptionWebServer):
        super().__init__(daemon=True)
        self.web_server = web_server
        self.loop: Optional[asyncio.AbstractEventLoop] = None
        self.error: Optional[Exception] = None
    def run(self):
        try:
            self.loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self.loop)
            self.loop.run_until_complete(self.web_server.start())
        except Exception as e:
            self.error = e
            print(f"ERROR: Web server failed to start: {e}")
 class EngineInitThread(Thread):
    """Thread for initializing the transcription engine without blocking."""
    def __init__(self, engine, on_complete: Callable[[bool, str], None]):
        super().__init__(daemon=True)
        self.engine = engine
        self.on_complete = on_complete
    def run(self):
        try:
            success = self.engine.initialize()
            if success:
                self.on_complete(True, "Engine initialized successfully")
            else:
                self.on_complete(False, "Failed to initialize engine")
        except Exception as e:
            self.on_complete(False, f"Error initializing engine: {e}")
 class AppController:
    """Headless controller managing the transcription application lifecycle.
    This replaces the orchestration logic that previously lived in MainWindow.
    It manages:
    - Transcription engine lifecycle (init, start, stop, reload)
    - Web server for OBS display
    - Server sync for multi-user mode
    - Configuration
    - Update checking
    All state changes are communicated via callbacks, making it UI-agnostic.
    """
    def __init__(self, config: Optional[Config] = None):
        self.config = config or Config()
        self.device_manager = DeviceManager()
        # State
        self._state = AppState.INITIALIZING
        self._state_lock = Lock()
        self.is_transcribing = False
        # Engine
        self.transcription_engine = None
        self._engine_init_thread: Optional[EngineInitThread] = None
        self.current_model_size: Optional[str] = None
        self.current_device_config: Optional[str] = None
        # Web server
        self.web_server: Optional[TranscriptionWebServer] = None
        self.web_server_thread: Optional[WebServerThread] = None
        self.actual_web_port: Optional[int] = None
        # Server sync
        self.server_sync_client: Optional[ServerSyncClient] = None
        # Transcription storage
        self.transcriptions: List[TranscriptionResult] = []
        # Callbacks for state notifications (set by the frontend / API server)
        self.on_state_changed: Optional[Callable[[str, str], None]] = None  # (state, message)
        self.on_transcription: Optional[Callable[[dict], None]] = None  # final transcription
        self.on_preview: Optional[Callable[[dict], None]] = None  # realtime preview
        self.on_error: Optional[Callable[[str], None]] = None
        self.on_credits_low: Optional[Callable[[int], None]] = None
    @property
    def state(self) -> str:
        with self._state_lock:
            return self._state
    def _set_state(self, state: str, message: str = ""):
        with self._state_lock:
            self._state = state
        if self.on_state_changed:
            self.on_state_changed(state, message)
    # ── Lifecycle ──────────────────────────────────────────────────
    def initialize(self):
        """Initialize the web server and transcription engine.
        Call this once at startup. Non-blocking -- engine init happens
        in a background thread.
        """
        self._set_state(AppState.INITIALIZING, "Starting web server...")
        self._start_web_server()
        self._set_state(AppState.INITIALIZING, "Loading transcription engine...")
        self._initialize_engine()
    def shutdown(self):
        """Gracefully shut down all components."""
        # Stop transcription
        if self.is_transcribing:
            self.stop_transcription()
        # Stop web server
        if self.web_server_thread and self.web_server_thread.is_alive():
            try:
                if self.web_server_thread.loop:
                    self.web_server_thread.loop.call_soon_threadsafe(
                        self.web_server_thread.loop.stop
                    )
            except Exception as e:
                print(f"Warning: Error stopping web server: {e}")
        # Stop transcription engine
        if self.transcription_engine:
            try:
                self.transcription_engine.stop()
            except Exception as e:
                print(f"Warning: Error stopping engine: {e}")
        # Wait for engine init thread
        if self._engine_init_thread and self._engine_init_thread.is_alive():
            self._engine_init_thread.join(timeout=5)
    # ── Web Server ─────────────────────────────────────────────────
    def _start_web_server(self):
        """Start the FastAPI web server for OBS display."""
        try:
            host = self.config.get('web_server.host', '127.0.0.1')
            port = self.config.get('web_server.port', 8080)
            # Gather display settings
            ws_kwargs = self._get_web_server_kwargs(host, port)
            # Try up to 5 ports
            ports_to_try = [port] + [port + i for i in range(1, 5)]
            for try_port in ports_to_try:
                print(f"Attempting to start web server at http://{host}:{try_port}")
                ws_kwargs['port'] = try_port
                self.web_server = TranscriptionWebServer(**ws_kwargs)
                self.web_server_thread = WebServerThread(self.web_server)
                self.web_server_thread.start()
                time.sleep(0.5)
                if self.web_server_thread.error:
                    error_str = str(self.web_server_thread.error)
                    if "address already in use" in error_str.lower() or "errno 98" in error_str.lower():
                        print(f"Port {try_port} is in use, trying next port...")
                        self.web_server = None
                        self.web_server_thread = None
                        continue
                    else:
                        print(f"Web server failed to start: {self.web_server_thread.error}")
                        self.web_server = None
                        self.web_server_thread = None
                        break
                else:
                    self.actual_web_port = try_port
                    print(f"Web server started at http://{host}:{try_port}")
                    return
            print(f"WARNING: Could not start web server on any port")
        except Exception as e:
            print(f"ERROR: Failed to initialize web server: {e}")
            self.web_server = None
            self.web_server_thread = None
    def _get_web_server_kwargs(self, host: str, port: int) -> dict:
        """Build kwargs dict for TranscriptionWebServer from config."""
        return dict(
            host=host,
            port=port,
            show_timestamps=self.config.get('display.show_timestamps', True),
            fade_after_seconds=self.config.get('display.fade_after_seconds', 10),
            max_lines=self.config.get('display.max_lines', 50),
            font_family=self.config.get('display.font_family', 'Arial'),
            font_size=self.config.get('display.font_size', 16),
            fonts_dir=self.config.fonts_dir,
            font_source=self.config.get('display.font_source', 'System Font'),
            websafe_font=self.config.get('display.websafe_font', 'Arial'),
            google_font=self.config.get('display.google_font', 'Roboto'),
            user_color=self.config.get('display.user_color', '#4CAF50'),
            text_color=self.config.get('display.text_color', '#FFFFFF'),
            background_color=self.config.get('display.background_color', '#000000B3'),
        )
    # ── Transcription Engine ───────────────────────────────────────
    def _initialize_engine(self):
        """Initialize the transcription engine in a background thread."""
        device_config = self.config.get('transcription.device', 'auto')
        self.device_manager.set_device(device_config)
        audio_device_str = self.config.get('audio.input_device', 'default')
        audio_device = None if audio_device_str == 'default' else int(audio_device_str)
        model = self.config.get('transcription.model', 'base.en')
        language = self.config.get('transcription.language', 'en')
        device = self.device_manager.get_device_for_whisper()
        compute_type = self.config.get('transcription.compute_type', 'default')
        self.current_model_size = model
        self.current_device_config = device_config
        user_name = self.config.get('user.name', 'User')
        continuous_mode = self.config.get('transcription.continuous_mode', False)
        if continuous_mode:
            post_speech_silence = 0.15
            min_gap = 0.0
            min_recording = 0.3
        else:
            post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
            min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
            min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok'):
            self.transcription_engine = DeepgramTranscriptionEngine(
                config=self.config,
                user_name=user_name,
                input_device_index=audio_device,
            )
            self.transcription_engine.set_callbacks(
                realtime_callback=self._on_realtime_transcription,
                final_callback=self._on_final_transcription,
            )
            self.transcription_engine.set_error_callback(self._on_remote_error)
            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
        else:
            self.transcription_engine = RealtimeTranscriptionEngine(
                model=model,
                device=device,
                language=language,
                compute_type=compute_type,
                enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
                realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
                realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
                silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
                silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
                webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
                post_speech_silence_duration=post_speech_silence,
                min_length_of_recording=min_recording,
                min_gap_between_recordings=min_gap,
                pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
                beam_size=self.config.get('transcription.beam_size', 5),
                initial_prompt=self.config.get('transcription.initial_prompt', ''),
                no_log_file=self.config.get('transcription.no_log_file', True),
                input_device_index=audio_device,
                user_name=user_name,
            )
            self.transcription_engine.set_callbacks(
                realtime_callback=self._on_realtime_transcription,
                final_callback=self._on_final_transcription,
            )
        # Start init in background thread
        self._engine_init_thread = EngineInitThread(
            self.transcription_engine,
            self._on_engine_ready,
        )
        self._engine_init_thread.start()
    def _on_engine_ready(self, success: bool, message: str):
        """Called from EngineInitThread when engine init completes."""
        if success:
            remote_mode = self.config.get('remote.mode', 'local')
            if remote_mode in ('managed', 'byok'):
                mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
                device_display = f"Deepgram ({mode_label})"
            elif self.transcription_engine:
                actual_device = self.transcription_engine.device
                compute_type = self.transcription_engine.compute_type
                device_display = f"{actual_device.upper()} ({compute_type})"
            else:
                device_display = "Unknown"
            self._set_state(AppState.READY, f"Ready | Device: {device_display}")
        else:
            self._set_state(AppState.ERROR, message)
    # ── Transcription Control ──────────────────────────────────────
    def start_transcription(self) -> tuple[bool, str]:
        """Start transcription. Returns (success, message)."""
        if self.is_transcribing:
            return False, "Already transcribing"
        if not self.transcription_engine or not self.transcription_engine.is_ready():
            return False, "Transcription engine not ready"
        try:
            success = self.transcription_engine.start_recording()
            if not success:
                return False, "Failed to start recording"
            # Start server sync if enabled
            if self.config.get('server_sync.enabled', False):
                self._start_server_sync()
            self.is_transcribing = True
            self._set_state(AppState.TRANSCRIBING, "Transcribing...")
            return True, "Transcription started"
        except Exception as e:
            return False, f"Failed to start transcription: {e}"
    def stop_transcription(self) -> tuple[bool, str]:
        """Stop transcription. Returns (success, message)."""
        if not self.is_transcribing:
            return False, "Not transcribing"
        try:
            if self.transcription_engine:
                self.transcription_engine.stop_recording()
            if self.server_sync_client:
                self.server_sync_client.stop()
                self.server_sync_client = None
            self.is_transcribing = False
            self._set_state(AppState.READY, "Ready")
            return True, "Transcription stopped"
        except Exception as e:
            return False, f"Failed to stop transcription: {e}"
    def clear_transcriptions(self) -> int:
        """Clear stored transcriptions. Returns count of cleared items."""
        count = len(self.transcriptions)
        self.transcriptions.clear()
        return count
    def get_transcriptions_text(self, include_timestamps: bool = True) -> str:
        """Get all transcriptions as formatted text."""
        lines = []
        for result in self.transcriptions:
            parts = []
            if include_timestamps:
                parts.append(f"[{result.timestamp.strftime('%H:%M:%S')}]")
            if result.user_name and result.user_name.strip():
                parts.append(f"{result.user_name}:")
            parts.append(result.text)
            lines.append(" ".join(parts))
        return "\n".join(lines)
    def reload_engine(self) -> tuple[bool, str]:
        """Reload the transcription engine with current config settings."""
        try:
            was_transcribing = self.is_transcribing
            if was_transcribing:
                self.stop_transcription()
            self._set_state(AppState.RELOADING, "Reloading engine...")
            # Wait for any existing init thread
            if self._engine_init_thread and self._engine_init_thread.is_alive():
                self._engine_init_thread.join(timeout=10)
            # Stop current engine
            if self.transcription_engine:
                try:
                    self.transcription_engine.stop()
                except Exception as e:
                    print(f"Warning: Error stopping engine: {e}")
            # Re-initialize
            self._initialize_engine()
            return True, "Engine reload initiated"
        except Exception as e:
            self._set_state(AppState.ERROR, f"Engine reload failed: {e}")
            return False, str(e)
    # ── Transcription Callbacks ────────────────────────────────────
    def _on_realtime_transcription(self, result: TranscriptionResult):
        """Handle realtime (preview) transcription."""
        if not self.is_transcribing:
            return
        try:
            # Broadcast to web server
            if self.web_server and self.web_server_thread and self.web_server_thread.loop:
                asyncio.run_coroutine_threadsafe(
                    self.web_server.broadcast_preview(
                        result.text, result.user_name, result.timestamp
                    ),
                    self.web_server_thread.loop,
                )
            # Send to server sync
            if self.server_sync_client:
                self.server_sync_client.send_preview(result.text, result.timestamp)
            # Notify frontend
            if self.on_preview:
                self.on_preview({
                    "text": result.text,
                    "user_name": result.user_name,
                    "timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
                    "is_preview": True,
                })
        except Exception as e:
            print(f"Error handling realtime transcription: {e}")
    def _on_final_transcription(self, result: TranscriptionResult):
        """Handle final transcription."""
        if not self.is_transcribing:
            return
        try:
            self.transcriptions.append(result)
            # Broadcast to web server
            if self.web_server and self.web_server_thread and self.web_server_thread.loop:
                asyncio.run_coroutine_threadsafe(
                    self.web_server.broadcast_transcription(
                        result.text, result.user_name, result.timestamp
                    ),
                    self.web_server_thread.loop,
                )
            # Send to server sync
            if self.server_sync_client:
                self.server_sync_client.send_transcription(
                    result.text, result.timestamp
                )
            # Notify frontend
            if self.on_transcription:
                self.on_transcription({
                    "text": result.text,
                    "user_name": result.user_name,
                    "timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
                    "is_preview": False,
                })
        except Exception as e:
            print(f"Error handling final transcription: {e}")
    def _on_remote_error(self, error_msg: str):
        """Handle error from remote transcription service."""
        print(f"Remote transcription error: {error_msg}")
        if self.on_error:
            self.on_error(error_msg)
    def _on_credits_low(self, seconds_remaining: int):
        """Handle low credits warning from proxy."""
        if self.on_credits_low:
            self.on_credits_low(seconds_remaining)
    # ── Server Sync ────────────────────────────────────────────────
    def _start_server_sync(self):
        """Start server sync client."""
        try:
            url = self.config.get('server_sync.url', '')
            if not url:
                print("Server sync enabled but no URL configured")
                return
            room = self.config.get('server_sync.room', 'default')
            passphrase = self.config.get('server_sync.passphrase', '')
            user_name = self.config.get('user.name', 'User')
            fonts_dir = self.config.fonts_dir
            font_source = self.config.get('display.font_source', 'System Font')
            if font_source == "System Font":
                font_source = "None"
            self.server_sync_client = ServerSyncClient(
                url=url,
                room=room,
                passphrase=passphrase,
                user_name=user_name,
                fonts_dir=fonts_dir,
                font_source=font_source,
                websafe_font=self.config.get('display.websafe_font', '') or None,
                google_font=self.config.get('display.google_font', '') or None,
                custom_font_file=self.config.get('display.custom_font_file', '') or None,
                user_color=self.config.get('display.user_color', '#4CAF50'),
                text_color=self.config.get('display.text_color', '#FFFFFF'),
                background_color=self.config.get('display.background_color', '#000000B3'),
            )
            self.server_sync_client.start()
        except Exception as e:
            print(f"Error starting server sync: {e}")
    # ── Configuration ──────────────────────────────────────────────
    def apply_settings(self, new_config: Optional[dict] = None) -> tuple[bool, str]:
        """Apply settings changes. If new_config is provided, merge it first.
        Returns (engine_reload_needed, message).
        """
        if new_config:
            for key, value in new_config.items():
                self.config.set(key, value)
        # Update web server display settings
        if self.web_server:
            self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
            self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
            self.web_server.max_lines = self.config.get('display.max_lines', 50)
            self.web_server.font_family = self.config.get('display.font_family', 'Arial')
            self.web_server.font_size = self.config.get('display.font_size', 16)
            self.web_server.font_source = self.config.get('display.font_source', 'System Font')
            self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
            self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
            self.web_server.user_color = self.config.get('display.user_color', '#4CAF50')
            self.web_server.text_color = self.config.get('display.text_color', '#FFFFFF')
            self.web_server.background_color = self.config.get('display.background_color', '#000000B3')
        # Restart server sync if running
        if self.is_transcribing and self.server_sync_client:
            self.server_sync_client.stop()
            self.server_sync_client = None
            if self.config.get('server_sync.enabled', False):
                self._start_server_sync()
        # Check if model/device changed
        new_model = self.config.get('transcription.model', 'base.en')
        new_device = self.config.get('transcription.device', 'auto')
        engine_reload_needed = (
            self.current_model_size != new_model
            or self.current_device_config != new_device
        )
        if engine_reload_needed:
            self.reload_engine()
            return True, "Settings applied. Engine reloading with new model/device."
        else:
            return False, "Settings applied successfully."
    def get_status(self) -> dict:
        """Get current application status as a dict."""
        host = self.config.get('web_server.host', '127.0.0.1')
        port = self.actual_web_port or self.config.get('web_server.port', 8080)
        device_info = self.device_manager.get_device_info()
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok') and self.transcription_engine:
            mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
            engine_device = f"Deepgram ({mode_label})"
        elif self.transcription_engine and hasattr(self.transcription_engine, 'device'):
            engine_device = f"{self.transcription_engine.device.upper()} ({self.transcription_engine.compute_type})"
        else:
            engine_device = "Not initialized"
        return {
            "state": self.state,
            "is_transcribing": self.is_transcribing,
            "version": __version__,
            "engine_device": engine_device,
            "web_server": {
                "host": host,
                "port": port,
                "url": f"http://{host}:{port}",
                "running": self.web_server_thread is not None and self.web_server_thread.is_alive(),
            },
            "transcription_count": len(self.transcriptions),
            "remote_mode": remote_mode,
            "server_sync_enabled": self.config.get('server_sync.enabled', False),
        }
    def get_audio_devices(self) -> list[dict]:
        """List available audio input devices."""
        import sounddevice as sd
        devices = []
        try:
            device_list = sd.query_devices()
            for i, device in enumerate(device_list):
                if device['max_input_channels'] > 0:
                    devices.append({"index": i, "name": device['name']})
        except Exception:
            pass
        if not devices:
            devices = [{"index": 0, "name": "Default"}]
        return devices
    def get_compute_devices(self) -> list[dict]:
        """List available compute devices."""
        device_info = self.device_manager.get_device_info()
        devices = [{"id": "auto", "name": "Auto-detect"}]
        for dev_id, dev_name in device_info:
            devices.append({"id": dev_id, "name": dev_name})
        return devices
    # ── Update Checking ────────────────────────────────────────────
    def check_for_updates(self) -> dict:
        """Check for updates synchronously. Returns update info or None."""
        from client.update_checker import UpdateChecker
        gitea_url = self.config.get('updates.gitea_url', 'https://repo.anhonesthost.net')
        owner = self.config.get('updates.owner', 'streamer-tools')
        repo = self.config.get('updates.repo', 'local-transcription')
        if not gitea_url or not owner or not repo:
            return {"available": False, "error": "Update checking not configured"}
        checker = UpdateChecker(
            current_version=__version__,
            gitea_url=gitea_url,
            owner=owner,
            repo=repo,
        )
        try:
            release_info = checker.check_for_update()
            self.config.set('updates.last_check', datetime.now().isoformat())
            if release_info:
                skipped = self.config.get('updates.skipped_versions', [])
                return {
                    "available": True,
                    "version": release_info.version,
                    "download_url": release_info.download_url,
                    "release_notes": release_info.release_notes,
                    "skipped": release_info.version in skipped,
                }
            else:
                return {"available": False, "current_version": __version__}
        except Exception as e:
            return {"available": False, "error": str(e)}
    def skip_version(self, version: str):
        """Mark a version as skipped for update notifications."""
        skipped = self.config.get('updates.skipped_versions', [])
        if version not in skipped:
            skipped.append(version)
            self.config.set('updates.skipped_versions', skipped)
--- a/backend/main_headless.py
+++ b/backend/main_headless.py
@@ -0,0 +1,131 @@
 #!/usr/bin/env python3
 """Headless entry point for the Local Transcription backend.
 Runs the transcription engine + API server without any GUI (no PySide6).
 Designed to be launched as a Tauri sidecar or run standalone for development.
 Usage:
    python -m backend.main_headless [--port PORT] [--host HOST]
 The backend prints the actual port to stdout as JSON on startup:
    {"event": "ready", "port": 8080}
 This allows the Tauri shell to discover which port the backend bound to.
 """
 import argparse
 import json
 import multiprocessing
 import os
 import signal
 import sys
 from pathlib import Path
 # Must be called before anything else for PyInstaller compatibility
 multiprocessing.freeze_support()
 if __name__ == "__main__":
    try:
        multiprocessing.set_start_method('spawn', force=True)
    except RuntimeError:
        pass
 # Add project root to path
 project_root = Path(__file__).resolve().parent.parent
 sys.path.insert(0, str(project_root))
 os.chdir(project_root)
 from client.instance_lock import InstanceLock
 def main():
    parser = argparse.ArgumentParser(description="Local Transcription headless backend")
    parser.add_argument("--host", default="127.0.0.1", help="API server host (default: 127.0.0.1)")
    parser.add_argument("--port", type=int, default=8080, help="API server port (default: 8080)")
    args = parser.parse_args()
    instance_lock = InstanceLock()
    if not instance_lock.acquire():
        print(json.dumps({"event": "error", "message": "Another instance is already running"}),
              flush=True)
        sys.exit(1)
    def handle_shutdown(signum, frame):
        print(json.dumps({"event": "shutdown"}), flush=True)
        if controller:
            controller.shutdown()
        instance_lock.release()
        sys.exit(0)
    signal.signal(signal.SIGTERM, handle_shutdown)
    signal.signal(signal.SIGINT, handle_shutdown)
    controller = None
    try:
        from backend.app_controller import AppController
        from backend.api_server import APIServer
        # Override web server port from CLI arg
        from client.config import Config
        config = Config()
        config.set('web_server.host', args.host)
        config.set('web_server.port', args.port)
        # Create controller and initialize
        controller = AppController(config=config)
        # Wire a state callback that prints the ready event
        def on_state_changed(state, message):
            event = {"event": "state", "state": state, "message": message}
            print(json.dumps(event), flush=True)
        controller.on_state_changed = on_state_changed
        # Initialize engine + web server
        controller.initialize()
        # Create API server wrapping the controller
        api_server = APIServer(controller)
        # OBS display runs on the configured port, API server on port+1
        obs_port = controller.actual_web_port or args.port
        api_port = obs_port + 1
        # Print ready event so Tauri can discover the API port
        print(json.dumps({
            "event": "ready",
            "port": api_port,
            "obs_port": obs_port,
        }), flush=True)
        # Run the API server (blocks)
        import uvicorn
        import logging
        logging.getLogger("uvicorn").setLevel(logging.ERROR)
        logging.getLogger("uvicorn.access").setLevel(logging.ERROR)
        uvicorn.run(
            api_server.app,
            host=args.host,
            port=api_port,
            log_level="error",
            access_log=False,
        )
    except KeyboardInterrupt:
        print(json.dumps({"event": "shutdown", "reason": "keyboard_interrupt"}), flush=True)
    except Exception as e:
        print(json.dumps({"event": "error", "message": str(e)}), flush=True)
        import traceback
        traceback.print_exc()
        sys.exit(1)
    finally:
        if controller:
            controller.shutdown()
        instance_lock.release()
 if __name__ == "__main__":
    main()
--- a/backend/tests/init.py
+++ b/backend/tests/init.py
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -0,0 +1,159 @@
 """Shared fixtures for backend tests.
 Heavy third-party modules (torch, sounddevice, numpy, RealtimeSTT, etc.) are
 stubbed at the *sys.modules* level before any backend code is imported.  This
 lets the test suite run on a plain Python install without GPU drivers, audio
 hardware, or heavyweight ML libraries.
 """
 import sys
 import types
 from pathlib import Path
 from unittest.mock import MagicMock
 import pytest
 # ── Project root on sys.path ────────────────────────────────────────
 project_root = Path(__file__).resolve().parent.parent.parent
 if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
 # ── Stub heavy modules before anything imports them ─────────────────
 def _stub(name: str) -> types.ModuleType:
    """Create a stub module and register it in sys.modules if not already present."""
    if name in sys.modules:
        return sys.modules[name]
    mod = types.ModuleType(name)
    sys.modules[name] = mod
    return mod
 # numpy -- must behave like a real module for `import numpy as np`
 _np = _stub("numpy")
 _np.float32 = float
 _np.float64 = float
 _np.int16 = int
 _np.ndarray = MagicMock
 _np.array = MagicMock(return_value=MagicMock())
 _np.zeros = MagicMock(return_value=MagicMock())
 _np.frombuffer = MagicMock(return_value=MagicMock())
 # torch + sub-modules
 _torch = _stub("torch")
 _torch.cuda = MagicMock()
 _torch.cuda.is_available = MagicMock(return_value=False)
 _torch.backends = MagicMock()
 _torch.backends.mps = MagicMock()
 _torch.backends.mps.is_available = MagicMock(return_value=False)
 _stub("torch.cuda")
 _stub("torch.backends")
 _stub("torch.backends.mps")
 _stub("torchaudio")
 # sounddevice
 _sd = _stub("sounddevice")
 _sd.query_devices = MagicMock(return_value=[])
 # RealtimeSTT (imported by transcription_engine_realtime)
 _rtstt = _stub("RealtimeSTT")
 _rtstt.AudioToTextRecorder = MagicMock
 # faster_whisper (sometimes imported transitively)
 _stub("faster_whisper")
 # noisereduce
 _stub("noisereduce")
 # scipy
 _scipy = _stub("scipy")
 _stub("scipy.signal")
 _stub("scipy.io")
 _stub("scipy.io.wavfile")
 # webrtcvad
 _stub("webrtcvad")
 # openwakeword
 _stub("openwakeword")
 # pvporcupine
 _stub("pvporcupine")
 # PySide6 (should not be needed, but just in case)
 _stub("PySide6")
 _stub("PySide6.QtWidgets")
 _stub("PySide6.QtCore")
 _stub("PySide6.QtGui")
 # websockets
 _ws = _stub("websockets")
 _ws.connect = MagicMock
 # deepgram  (cloud transcription)
 _stub("deepgram")
 # ── Fixtures ────────────────────────────────────────────────────────
@pytest.fixture
 def mock_config(tmp_path):
    """Return a Config object backed by a temporary file.
    This avoids touching the real user config at ~/.local-transcription/.
    """
    config_file = tmp_path / "test_config.yaml"
    from client.config import Config
    config = Config(config_path=str(config_file))
    return config
@pytest.fixture
 def controller(mock_config):
    """Return an AppController wired to *mock_config* without starting heavy
    subsystems (engine, web server, device manager).
    The transcription engine, web server thread, and DeviceManager are all
    replaced with lightweight mocks so the test suite can run without a GPU,
    audio hardware, or a free network port.
    """
    from unittest.mock import patch
    with patch("backend.app_controller.DeviceManager") as MockDM, \
         patch("backend.app_controller.RealtimeTranscriptionEngine"), \
         patch("backend.app_controller.DeepgramTranscriptionEngine"), \
         patch("backend.app_controller.TranscriptionWebServer"), \
         patch("backend.app_controller.ServerSyncClient"):
        # DeviceManager stub
        dm_instance = MagicMock()
        dm_instance.get_device_info.return_value = [("cpu", "CPU")]
        dm_instance.get_device_for_whisper.return_value = "cpu"
        MockDM.return_value = dm_instance
        from backend.app_controller import AppController
        ctrl = AppController(config=mock_config)
        yield ctrl
@pytest.fixture
 def api_client(controller):
    """Return an httpx.AsyncClient speaking ASGI to the APIServer's FastAPI app.
    Usage in tests::
        async def test_something(api_client):
            resp = await api_client.get("/api/status")
            assert resp.status_code == 200
    """
    from backend.api_server import APIServer
    import httpx
    api = APIServer(controller)
    transport = httpx.ASGITransport(app=api.app)
    client = httpx.AsyncClient(transport=transport, base_url="http://testserver")
    return client
--- a/backend/tests/test_api_server.py
+++ b/backend/tests/test_api_server.py
@@ -0,0 +1,150 @@
 """Tests for backend.api_server.APIServer REST endpoints."""
 import sys
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 import pytest_asyncio
 # Ensure project root is on path
 project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(project_root))
 # ── GET /api/status ─────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_get_status(api_client):
    resp = await api_client.get("/api/status")
    assert resp.status_code == 200
    data = resp.json()
    assert "state" in data
    assert "is_transcribing" in data
    assert "version" in data
    assert "web_server" in data
 # ── GET /api/config ─────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_get_config(api_client):
    resp = await api_client.get("/api/config")
    assert resp.status_code == 200
    data = resp.json()
    # The config should be a dict (the raw config mapping)
    assert isinstance(data, dict)
 # ── PUT /api/config ─────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_put_config(api_client, controller):
    """Updating config via PUT should persist and return success."""
    # Patch reload_engine to avoid heavy lifting
    controller.reload_engine = MagicMock(return_value=(True, "ok"))
    controller.current_model_size = "base.en"
    controller.current_device_config = "auto"
    controller.config.set("transcription.model", "base.en")
    controller.config.set("transcription.device", "auto")
    resp = await api_client.put(
        "/api/config",
        json={"settings": {"display.font_size": 24}},
    )
    assert resp.status_code == 200
    body = resp.json()
    assert body["status"] == "ok"
    # Verify the value was actually saved
    assert controller.config.get("display.font_size") == 24
 # ── POST /api/start (engine not ready) ─────────────────────────────
@pytest.mark.asyncio
 async def test_start_when_not_ready(api_client, controller):
    """Starting transcription without an engine should return 400."""
    controller.transcription_engine = None
    resp = await api_client.post("/api/start")
    assert resp.status_code == 400
 # ── POST /api/clear ─────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_clear(api_client, controller):
    from client.transcription_engine_realtime import TranscriptionResult
    from datetime import datetime
    controller.transcriptions = [
        TranscriptionResult(text="One", is_final=True, timestamp=datetime.now(), user_name="U"),
    ]
    resp = await api_client.post("/api/clear")
    assert resp.status_code == 200
    body = resp.json()
    assert body["cleared"] == 1
    assert len(controller.transcriptions) == 0
 # ── GET /api/audio-devices ──────────────────────────────────────────
@pytest.mark.asyncio
 async def test_get_audio_devices(api_client, controller):
    """Audio devices endpoint should return a list, even when mocked."""
    # Mock sounddevice so the test works without audio hardware
    with patch("backend.app_controller.AppController.get_audio_devices",
               return_value=[{"index": 0, "name": "Mock Mic"}]):
        resp = await api_client.get("/api/audio-devices")
    assert resp.status_code == 200
    data = resp.json()
    assert "devices" in data
    assert len(data["devices"]) >= 1
 # ── GET /api/compute-devices ────────────────────────────────────────
@pytest.mark.asyncio
 async def test_get_compute_devices(api_client, controller):
    resp = await api_client.get("/api/compute-devices")
    assert resp.status_code == 200
    data = resp.json()
    assert "devices" in data
    # At minimum we get the "Auto-detect" entry
    assert any(d["id"] == "auto" for d in data["devices"])
 # ── GET /api/check-update ──────────────────────────────────────────
@pytest.mark.asyncio
 async def test_check_update(api_client, controller):
    """check-update should return a dict with an 'available' key."""
    with patch.object(controller, "check_for_updates",
                      return_value={"available": False, "current_version": "1.0.0"}):
        resp = await api_client.get("/api/check-update")
    assert resp.status_code == 200
    data = resp.json()
    assert "available" in data
 # ── GET /api/version ────────────────────────────────────────────────
@pytest.mark.asyncio
 async def test_version(api_client):
    resp = await api_client.get("/api/version")
    assert resp.status_code == 200
    data = resp.json()
    assert "version" in data
    # Should be a non-empty string
    assert isinstance(data["version"], str)
    assert len(data["version"]) > 0
--- a/backend/tests/test_app_controller.py
+++ b/backend/tests/test_app_controller.py
@@ -0,0 +1,181 @@
 """Tests for backend.app_controller.AppController."""
 import sys
 from datetime import datetime
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 import pytest
 # Ensure project root is on path
 project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(project_root))
 from backend.app_controller import AppState
 # ── basic state ─────────────────────────────────────────────────────
 def test_initial_state(controller):
    """A freshly constructed controller should be INITIALIZING and not transcribing."""
    assert controller.state == AppState.INITIALIZING
    assert controller.is_transcribing is False
 # ── start / stop ────────────────────────────────────────────────────
 def test_start_transcription_without_engine(controller):
    """Starting transcription before the engine is ready should fail gracefully."""
    controller.transcription_engine = None
    success, message = controller.start_transcription()
    assert success is False
    assert "not ready" in message.lower()
 def test_start_stop_cycle(controller):
    """Full start -> stop cycle with a mocked engine that reports ready."""
    engine = MagicMock()
    engine.is_ready.return_value = True
    engine.start_recording.return_value = True
    controller.transcription_engine = engine
    # Start
    ok, msg = controller.start_transcription()
    assert ok is True
    assert controller.is_transcribing is True
    assert controller.state == AppState.TRANSCRIBING
    # Stop
    ok, msg = controller.stop_transcription()
    assert ok is True
    assert controller.is_transcribing is False
    engine.stop_recording.assert_called_once()
 def test_double_start_rejected(controller):
    """Calling start_transcription twice should reject the second call."""
    engine = MagicMock()
    engine.is_ready.return_value = True
    engine.start_recording.return_value = True
    controller.transcription_engine = engine
    controller.start_transcription()
    success, message = controller.start_transcription()
    assert success is False
    assert "already" in message.lower()
 # ── transcription storage ───────────────────────────────────────────
 def test_clear_transcriptions(controller):
    """clear_transcriptions should empty the list and return the count."""
    from client.transcription_engine_realtime import TranscriptionResult
    controller.transcriptions = [
        TranscriptionResult(text="Hello", is_final=True, timestamp=datetime.now(), user_name="Alice"),
        TranscriptionResult(text="World", is_final=True, timestamp=datetime.now(), user_name="Bob"),
    ]
    count = controller.clear_transcriptions()
    assert count == 2
    assert len(controller.transcriptions) == 0
 def test_get_transcriptions_text_with_timestamps(controller):
    """get_transcriptions_text should include [HH:MM:SS] prefixes when requested."""
    from client.transcription_engine_realtime import TranscriptionResult
    ts = datetime(2025, 1, 15, 10, 30, 45)
    controller.transcriptions = [
        TranscriptionResult(text="Test line", is_final=True, timestamp=ts, user_name="User"),
    ]
    text = controller.get_transcriptions_text(include_timestamps=True)
    assert "[10:30:45]" in text
    assert "User:" in text
    assert "Test line" in text
 # ── settings / engine reload ────────────────────────────────────────
 def test_apply_settings_triggers_reload_on_model_change(controller):
    """Changing the transcription model should trigger an engine reload."""
    controller.current_model_size = "base.en"
    controller.current_device_config = "auto"
    # Patch reload_engine so it doesn't actually try to spin up threads
    controller.reload_engine = MagicMock(return_value=(True, "reloaded"))
    reloaded, msg = controller.apply_settings({
        "transcription.model": "small.en",
    })
    assert reloaded is True
    controller.reload_engine.assert_called_once()
 def test_apply_settings_no_reload_when_same(controller):
    """If model and device haven't changed, no reload should happen."""
    controller.current_model_size = "base.en"
    controller.current_device_config = "auto"
    # Ensure config returns the same values
    controller.config.set("transcription.model", "base.en")
    controller.config.set("transcription.device", "auto")
    controller.reload_engine = MagicMock(return_value=(True, "reloaded"))
    reloaded, msg = controller.apply_settings({
        "display.font_size": 20,
    })
    assert reloaded is False
    controller.reload_engine.assert_not_called()
 # ── transcription callbacks ─────────────────────────────────────────
 def test_on_final_transcription_callback_fires(controller):
    """_on_final_transcription should append and invoke on_transcription callback."""
    from client.transcription_engine_realtime import TranscriptionResult
    received = []
    controller.on_transcription = lambda data: received.append(data)
    controller.is_transcribing = True
    controller._set_state(AppState.TRANSCRIBING)
    result = TranscriptionResult(
        text="Hello world",
        is_final=True,
        timestamp=datetime.now(),
        user_name="Tester",
    )
    controller._on_final_transcription(result)
    assert len(controller.transcriptions) == 1
    assert len(received) == 1
    assert received[0]["text"] == "Hello world"
    assert received[0]["user_name"] == "Tester"
    assert received[0]["is_preview"] is False
 def test_on_final_transcription_ignored_when_not_transcribing(controller):
    """If the controller is not in transcribing state the callback should be a no-op."""
    from client.transcription_engine_realtime import TranscriptionResult
    controller.is_transcribing = False
    result = TranscriptionResult(
        text="Should be ignored",
        is_final=True,
        timestamp=datetime.now(),
        user_name="Ghost",
    )
    controller._on_final_transcription(result)
    assert len(controller.transcriptions) == 0
--- a/backend/tests/test_main_headless.py
+++ b/backend/tests/test_main_headless.py
@@ -0,0 +1,56 @@
 """Tests for backend.main_headless ready-event JSON format."""
 import sys
 from pathlib import Path
 import pytest
 # Ensure project root is on path
 project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(project_root))
 def test_ready_event_reports_api_port_not_obs_port():
    """The ready JSON printed by main_headless must set ``port`` to
    ``obs_port + 1`` (the API port), not the OBS display port.
    From main_headless.py::
        obs_port = controller.actual_web_port or args.port
        api_port = obs_port + 1
        print(json.dumps({
            "event": "ready",
            "port": api_port,
            "obs_port": obs_port,
        }), flush=True)
    We verify this contract by reading the source and checking the
    structure directly (running main() would start a real server).
    """
    import ast
    import textwrap
    source_path = project_root / "backend" / "main_headless.py"
    source = source_path.read_text()
    # Verify the key relationships exist in the source:
    # 1. api_port = obs_port + 1
    assert "api_port = obs_port + 1" in source, (
        "Expected `api_port = obs_port + 1` in main_headless.py"
    )
    # 2. The ready event JSON uses api_port for "port", not obs_port
    assert '"port": api_port' in source or "'port': api_port" in source, (
        "The ready event should report api_port as 'port'"
    )
    # 3. obs_port is also included separately
    assert '"obs_port": obs_port' in source or "'obs_port': obs_port" in source, (
        "The ready event should also include 'obs_port'"
    )
    # 4. Verify the event name
    assert '"event": "ready"' in source or "'event': 'ready'" in source, (
        "The ready event should have event='ready'"
    )
--- a/client/config.py
+++ b/client/config.py
@@ -48,6 +48,25 @@ class Config:
            # Save the default configuration
            self.save()
        # Migrate remote_processing -> remote
        self._migrate_remote_config()
    def _migrate_remote_config(self):
        """Migrate old remote_processing config to new remote config."""
        if 'remote_processing' in self.config and 'remote' not in self.config:
            old = self.config['remote_processing']
            self.config['remote'] = {
                'mode': 'managed' if old.get('enabled', False) else 'local',
                'server_url': old.get('server_url', ''),
                'auth_token': '',
                'byok_api_key': old.get('api_key', ''),
                'deepgram_model': 'nova-2',
                'language': 'en-US',
                'fallback_to_local': old.get('fallback_to_local', True),
            }
            del self.config['remote_processing']
            self.save()
    def save(self) -> None:
        """Save current configuration to file."""
        with open(self.config_path, 'w') as f:
--- a/client/deepgram_transcription.py
+++ b/client/deepgram_transcription.py
@@ -0,0 +1,528 @@
 """Deepgram-based transcription engine using WebSocket streaming.
 Supports two modes:
  - Managed mode: connects to a proxy server that handles Deepgram credentials
  - BYOK mode: connects directly to the Deepgram API with a user-provided key
 Implements the same duck-type interface as RealtimeTranscriptionEngine so
 MainWindow can use it as a drop-in replacement.
 """
 import asyncio
 import json
 import logging
 import numpy as np
 import threading
 from datetime import datetime
 from queue import Queue, Empty
 from typing import Optional, Callable
 from client.transcription_engine_realtime import TranscriptionResult
 logger = logging.getLogger(__name__)
 class DeepgramTranscriptionEngine:
    """
    Transcription engine that streams audio to Deepgram via WebSocket.
    In managed mode the connection goes through a proxy at
    ``wss://<server>/ws/transcribe`` which handles authentication and
    Deepgram credentials.  In BYOK (bring-your-own-key) mode the
    connection goes directly to the Deepgram API.
    """
    # ------------------------------------------------------------------ #
    #  Construction / configuration
    # ------------------------------------------------------------------ #
    def __init__(self, config, user_name: str = "User", input_device_index: Optional[int] = None):
        """
        Initialise the engine from a :class:`client.config.Config` object.
        Args:
            config: Application ``Config`` instance.
            user_name: Display name attached to transcriptions.
            input_device_index: Index of the audio input device to use
                (``None`` for the system default).
        """
        self.config = config
        self.user_name = user_name
        self.input_device_index = input_device_index
        # Mode: 'managed' (proxy) or 'byok' (direct Deepgram)
        self.mode: str = config.get("remote.mode", "managed")
        # Managed-mode settings
        self.server_url: str = config.get("remote.server_url", "")
        self.auth_token: str = config.get("remote.auth_token", "")
        # BYOK-mode settings
        self.byok_api_key: str = config.get("remote.byok_api_key", "")
        # Deepgram model / language (used in both modes)
        self.deepgram_model: str = config.get("remote.deepgram_model", "nova-2")
        self.language: str = config.get("remote.language", "en-US")
        # Audio parameters
        self.sample_rate: int = 16000
        self.channels: int = 1
        self.blocksize: int = 4096
        # Callbacks
        self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
        self.final_callback: Optional[Callable[[TranscriptionResult], None]] = None
        self._on_error: Optional[Callable[[str], None]] = None
        self._on_credits_low: Optional[Callable[[int], None]] = None
        # Internal state
        self._is_initialized: bool = False
        self._is_recording: bool = False
        self._stop_event: threading.Event = threading.Event()
        self._audio_queue: Queue = Queue()
        # Asyncio event loop running in a daemon thread
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._thread: Optional[threading.Thread] = None
        # WebSocket handle (set inside the async context)
        self._ws = None
        # sounddevice InputStream
        self._stream = None
    # ------------------------------------------------------------------ #
    #  Callback setters
    # ------------------------------------------------------------------ #
    def set_callbacks(
        self,
        realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None,
        final_callback: Optional[Callable[[TranscriptionResult], None]] = None,
    ):
        """Set transcription result callbacks (matches RealtimeTranscriptionEngine API)."""
        self.realtime_callback = realtime_callback
        self.final_callback = final_callback
    def set_error_callback(self, fn: Optional[Callable[[str], None]]):
        """Set a callback invoked on errors.  ``fn`` receives a string message."""
        self._on_error = fn
    def set_credits_low_callback(self, fn: Optional[Callable[[int], None]]):
        """Set a callback for low-credit warnings.  ``fn`` receives seconds remaining."""
        self._on_credits_low = fn
    # ------------------------------------------------------------------ #
    #  Public interface (duck-typed with RealtimeTranscriptionEngine)
    # ------------------------------------------------------------------ #
    def initialize(self) -> bool:
        """Validate configuration and mark the engine as ready.
        Returns ``True`` when the engine is ready to start recording.
        """
        if self._is_initialized:
            return True
        if self.mode == "managed":
            if not self.server_url:
                logger.error("Managed mode requires a server URL (remote.server_url)")
                return False
            if not self.auth_token:
                logger.error("Managed mode requires an auth token (remote.auth_token)")
                return False
        elif self.mode == "byok":
            if not self.byok_api_key:
                logger.error("BYOK mode requires an API key (remote.byok_api_key)")
                return False
        else:
            logger.error("Unknown remote mode: %s (expected 'managed' or 'byok')", self.mode)
            return False
        self._is_initialized = True
        logger.info("DeepgramTranscriptionEngine initialised in %s mode", self.mode)
        return True
    def start_recording(self) -> bool:
        """Open the audio stream and connect the WebSocket.
        Returns ``True`` on success.
        """
        if not self._is_initialized:
            logger.error("Engine not initialised -- call initialize() first")
            return False
        if self._is_recording:
            return True
        self._stop_event.clear()
        self._is_recording = True
        # Start the asyncio event-loop thread (handles WS send/receive)
        self._thread = threading.Thread(target=self._run_event_loop, daemon=True)
        self._thread.start()
        # Start the audio capture stream
        try:
            self._start_audio_stream()
        except Exception as exc:
            logger.error("Failed to open audio stream: %s", exc)
            self._is_recording = False
            self._stop_event.set()
            return False
        logger.info("Recording started")
        return True
    def stop_recording(self):
        """Stop audio capture and close the WebSocket."""
        if not self._is_recording:
            return
        self._is_recording = False
        self._stop_event.set()
        # Stop audio stream
        self._stop_audio_stream()
        # Close WebSocket from outside the event-loop thread
        if self._ws is not None and self._loop is not None and not self._loop.is_closed():
            asyncio.run_coroutine_threadsafe(self._close_ws(), self._loop)
        # Wait for the thread to finish
        if self._thread is not None:
            self._thread.join(timeout=5)
            self._thread = None
        logger.info("Recording stopped")
    def stop(self):
        """Full shutdown -- stop recording and release all resources."""
        self.stop_recording()
        self._is_initialized = False
        logger.info("DeepgramTranscriptionEngine shut down")
    def is_ready(self) -> bool:
        """Return ``True`` if the engine has been successfully initialised."""
        return self._is_initialized
    # ------------------------------------------------------------------ #
    #  Audio capture (sounddevice)
    # ------------------------------------------------------------------ #
    def _start_audio_stream(self):
        """Open a ``sounddevice.InputStream`` that feeds the audio queue."""
        import sounddevice as sd
        def _audio_callback(indata, frames, time_info, status):  # noqa: ARG001
            if status:
                logger.warning("Audio stream status: %s", status)
            if self._is_recording:
                # float32 -> int16 PCM bytes
                pcm = (indata * 32767).astype(np.int16).tobytes()
                self._audio_queue.put(pcm)
        self._stream = sd.InputStream(
            samplerate=self.sample_rate,
            blocksize=self.blocksize,
            channels=self.channels,
            dtype="float32",
            device=self.input_device_index,
            callback=_audio_callback,
        )
        self._stream.start()
    def _stop_audio_stream(self):
        """Close the audio input stream."""
        if self._stream is not None:
            try:
                self._stream.stop()
                self._stream.close()
            except Exception as exc:
                logger.debug("Error closing audio stream: %s", exc)
            finally:
                self._stream = None
    # ------------------------------------------------------------------ #
    #  Asyncio event-loop (runs in daemon thread)
    # ------------------------------------------------------------------ #
    def _run_event_loop(self):
        """Entry point for the daemon thread -- runs the async event loop."""
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)
        try:
            self._loop.run_until_complete(self._ws_lifecycle())
        except Exception as exc:
            logger.error("Event-loop error: %s", exc)
        finally:
            try:
                self._loop.run_until_complete(self._loop.shutdown_asyncgens())
            except Exception:
                pass
            self._loop.close()
            self._loop = None
    async def _ws_lifecycle(self):
        """Connect, authenticate (if managed), then run send/receive loops."""
        import websockets
        try:
            ws_url, extra_headers = self._build_ws_url_and_headers()
            logger.info("Connecting to %s", ws_url)
            self._ws = await websockets.connect(
                ws_url,
                additional_headers=extra_headers,
                ping_interval=20,
                ping_timeout=10,
            )
            # Managed mode: send auth message and wait for ready
            if self.mode == "managed":
                if not await self._managed_handshake():
                    return
            # Run send and receive concurrently
            await asyncio.gather(
                self._send_loop(),
                self._receive_loop(),
            )
        except asyncio.CancelledError:
            pass
        except Exception as exc:
            msg = f"WebSocket error: {exc}"
            logger.error(msg)
            if self._on_error:
                self._on_error(msg)
        finally:
            await self._close_ws()
    def _build_ws_url_and_headers(self):
        """Return ``(url, headers)`` depending on the current mode."""
        if self.mode == "managed":
            # Ensure the server URL uses wss:// and append the path
            url = self.server_url.rstrip("/")
            if not url.startswith("ws://") and not url.startswith("wss://"):
                url = f"wss://{url}"
            url = f"{url}/ws/transcribe"
            return url, {}
        # BYOK -- connect directly to Deepgram
        params = (
            f"model={self.deepgram_model}"
            f"&language={self.language}"
            "&interim_results=true"
            "&encoding=linear16"
            f"&sample_rate={self.sample_rate}"
            f"&channels={self.channels}"
        )
        url = f"wss://api.deepgram.com/v1/listen?{params}"
        headers = {"Authorization": f"Token {self.byok_api_key}"}
        return url, headers
    # -- managed-mode handshake ---------------------------------------- #
    async def _managed_handshake(self) -> bool:
        """Send auth message and wait for ``ready`` (managed mode).
        Returns ``True`` on success.
        """
        auth_msg = {
            "type": "auth",
            "token": self.auth_token,
            "config": {
                "model": self.deepgram_model,
                "language": self.language,
                "sample_rate": self.sample_rate,
                "channels": self.channels,
                "encoding": "linear16",
                "interim_results": True,
            },
        }
        await self._ws.send(json.dumps(auth_msg))
        try:
            raw = await asyncio.wait_for(self._ws.recv(), timeout=15)
            data = json.loads(raw)
            if data.get("type") == "ready":
                logger.info("Managed proxy is ready")
                return True
            if data.get("type") == "error":
                err = data.get("message", "unknown error")
                logger.error("Auth error from proxy: %s", err)
                if self._on_error:
                    self._on_error(f"Proxy auth error: {err}")
                return False
            logger.warning("Unexpected handshake message: %s", data)
            return False
        except asyncio.TimeoutError:
            logger.error("Timed out waiting for proxy ready message")
            if self._on_error:
                self._on_error("Timed out waiting for proxy ready message")
            return False
    # -- send loop ----------------------------------------------------- #
    async def _send_loop(self):
        """Drain the audio queue and push raw PCM bytes over the WebSocket."""
        while not self._stop_event.is_set():
            try:
                pcm_bytes = self._audio_queue.get(timeout=0.1)
            except Empty:
                continue
            try:
                await self._ws.send(pcm_bytes)
            except Exception as exc:
                if not self._stop_event.is_set():
                    logger.error("Send error: %s", exc)
                break
    # -- receive loop -------------------------------------------------- #
    async def _receive_loop(self):
        """Listen for messages from the WebSocket and dispatch them."""
        while not self._stop_event.is_set():
            try:
                raw = await asyncio.wait_for(self._ws.recv(), timeout=1.0)
            except asyncio.TimeoutError:
                continue
            except Exception as exc:
                if not self._stop_event.is_set():
                    logger.error("Receive error: %s", exc)
                break
            try:
                data = json.loads(raw)
            except (json.JSONDecodeError, TypeError):
                logger.debug("Non-JSON message received, ignoring")
                continue
            if self.mode == "managed":
                self._handle_managed_message(data)
            else:
                self._handle_byok_message(data)
    # ------------------------------------------------------------------ #
    #  Message handlers
    # ------------------------------------------------------------------ #
    def _handle_managed_message(self, data: dict):
        """Process a message from the managed proxy."""
        msg_type = data.get("type", "")
        if msg_type == "transcript":
            text = data.get("text", "")
            is_final = data.get("is_final", False)
            if text.strip():
                result = TranscriptionResult(
                    text=text,
                    is_final=is_final,
                    timestamp=datetime.now(),
                    user_name=self.user_name,
                )
                if is_final:
                    if self.final_callback:
                        self.final_callback(result)
                else:
                    if self.realtime_callback:
                        self.realtime_callback(result)
        elif msg_type == "credits_low":
            seconds_remaining = data.get("seconds_remaining", 0)
            logger.warning("Credits low -- %d seconds remaining", seconds_remaining)
            if self._on_credits_low:
                self._on_credits_low(int(seconds_remaining))
        elif msg_type == "error":
            code = data.get("code", "")
            message = data.get("message", "Unknown error")
            logger.error("Proxy error [%s]: %s", code, message)
            if self._on_error:
                self._on_error(f"[{code}] {message}" if code else message)
        elif msg_type == "session_end":
            seconds_used = data.get("seconds_used", 0)
            logger.info("Session ended -- %d seconds used", seconds_used)
        elif msg_type == "ready":
            # May arrive again after reconnects; safe to ignore.
            logger.debug("Received ready message (already connected)")
        else:
            logger.debug("Unhandled managed message type: %s", msg_type)
    def _handle_byok_message(self, data: dict):
        """Process a message received directly from the Deepgram API."""
        msg_type = data.get("type", "")
        if msg_type == "Results":
            channel = data.get("channel", {})
            alternatives = channel.get("alternatives", [])
            if not alternatives:
                return
            transcript = alternatives[0].get("transcript", "")
            is_final = data.get("is_final", False)
            if transcript.strip():
                result = TranscriptionResult(
                    text=transcript,
                    is_final=is_final,
                    timestamp=datetime.now(),
                    user_name=self.user_name,
                )
                if is_final:
                    if self.final_callback:
                        self.final_callback(result)
                else:
                    if self.realtime_callback:
                        self.realtime_callback(result)
        elif msg_type == "Metadata":
            logger.debug("Deepgram metadata: %s", data)
        elif msg_type == "UtteranceEnd":
            logger.debug("Deepgram utterance end")
        else:
            logger.debug("Unhandled Deepgram message type: %s", msg_type)
    # ------------------------------------------------------------------ #
    #  Helpers
    # ------------------------------------------------------------------ #
    async def _close_ws(self):
        """Close the WebSocket connection if open."""
        if self._ws is not None:
            try:
                await self._ws.close()
            except Exception:
                pass
            self._ws = None
    def set_user_name(self, user_name: str):
        """Update the user name attached to future transcriptions."""
        self.user_name = user_name
    def is_recording_active(self) -> bool:
        """Return ``True`` if audio is currently being captured."""
        return self._is_recording
    def __repr__(self) -> str:
        return (
            f"DeepgramTranscriptionEngine(mode={self.mode}, "
            f"recording={self._is_recording})"
        )
    def __del__(self):
        """Best-effort cleanup."""
        try:
            self.stop()
        except Exception:
            pass
--- a/client/tests/init.py
+++ b/client/tests/init.py
--- a/client/tests/test_config.py
+++ b/client/tests/test_config.py
@@ -0,0 +1,78 @@
 """Tests for client.config.Config."""
 import sys
 from pathlib import Path
 import pytest
 # Ensure project root is on path
 project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(project_root))
 from client.config import Config
@pytest.fixture
 def cfg(tmp_path):
    """A Config backed by a temp file so we never touch the real user config."""
    return Config(config_path=str(tmp_path / "test_config.yaml"))
 # ── dot-notation get ────────────────────────────────────────────────
 def test_dot_notation_get(cfg):
    """Config.get should traverse nested dicts using dot-separated keys."""
    cfg.config = {"audio": {"sample_rate": 16000}}
    assert cfg.get("audio.sample_rate") == 16000
 # ── dot-notation set ────────────────────────────────────────────────
 def test_dot_notation_set(cfg):
    """Config.set should create/update nested values via dot-separated keys."""
    cfg.set("audio.sample_rate", 44100)
    assert cfg.config["audio"]["sample_rate"] == 44100
    # Also readable via .get
    assert cfg.get("audio.sample_rate") == 44100
 # ── missing key returns default ─────────────────────────────────────
 def test_missing_key_returns_default(cfg):
    """Accessing a nonexistent key should return the supplied default."""
    assert cfg.get("nonexistent.path", "fallback") == "fallback"
    assert cfg.get("also.missing") is None  # default default is None
 # ── nested set creates intermediate dicts ───────────────────────────
 def test_nested_set_creates_path(cfg):
    """Setting a deeply nested key should create all intermediate dicts."""
    cfg.config = {}
    cfg.set("a.b.c.d", 42)
    assert cfg.config["a"]["b"]["c"]["d"] == 42
    assert cfg.get("a.b.c.d") == 42
 # ── save and reload round-trip ──────────────────────────────────────
 def test_save_and_reload(tmp_path):
    """Values persisted via save() should survive a fresh Config load."""
    config_file = str(tmp_path / "roundtrip.yaml")
    # Create and populate
    cfg1 = Config(config_path=config_file)
    cfg1.set("user.name", "TestUser")
    cfg1.set("transcription.model", "tiny.en")
    # Load a fresh instance from the same file
    cfg2 = Config(config_path=config_file)
    assert cfg2.get("user.name") == "TestUser"
    assert cfg2.get("transcription.model") == "tiny.en"
--- a/config/default_config.yaml
+++ b/config/default_config.yaml
@@ -68,11 +68,14 @@ web_server:
  port: 8080
  host: "127.0.0.1"
-remote_processing:
+remote:
-  enabled: false  # Enable remote transcription offloading
+  mode: local  # local | managed | byok
-  server_url: ""  # WebSocket URL of remote transcription service (e.g., ws://your-server:8765/ws/transcribe)
+  server_url: ""  # Proxy server URL for managed mode (e.g., wss://your-proxy.com)
-  api_key: ""  # API key for authentication
+  auth_token: ""  # JWT stored after login (managed mode)
-  fallback_to_local: true  # Fall back to local processing if remote fails
+  byok_api_key: ""  # Deepgram API key for BYOK mode
  deepgram_model: nova-2  # Deepgram model to use
  language: en-US  # Language code
  fallback_to_local: true  # Fall back to local Whisper if remote fails
 updates:
  auto_check: true  # Check for updates on startup
--- a/gui/main_window_qt.py
+++ b/gui/main_window_qt.py
@@ -18,6 +18,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
 from client.config import Config
 from client.device_utils import DeviceManager
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from gui.settings_dialog_qt import SettingsDialog
 from server.web_display import TranscriptionWebServer
@@ -394,27 +395,44 @@ class MainWindow(QMainWindow):
            min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
            min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
-        self.transcription_engine = RealtimeTranscriptionEngine(
+        remote_mode = self.config.get('remote.mode', 'local')
-            model=model,
+
-            device=device,
+        if remote_mode in ('managed', 'byok'):
-            language=language,
+            # Use Deepgram-based remote transcription
-            compute_type=compute_type,
+            self.transcription_engine = DeepgramTranscriptionEngine(
-            enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
+                config=self.config,
-            realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
+                user_name=user_name,
-            realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
+                input_device_index=audio_device
-            silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
+            )
-            silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
+            self.transcription_engine.set_callbacks(
-            webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
+                realtime_callback=self._on_realtime_transcription,
-            post_speech_silence_duration=post_speech_silence,
+                final_callback=self._on_final_transcription
-            min_length_of_recording=min_recording,
+            )
-            min_gap_between_recordings=min_gap,
+            self.transcription_engine.set_error_callback(self._on_remote_error)
-            pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
+            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
-            beam_size=self.config.get('transcription.beam_size', 5),
+        else:
-            initial_prompt=self.config.get('transcription.initial_prompt', ''),
+            # Use local Whisper transcription
-            no_log_file=self.config.get('transcription.no_log_file', True),
+            self.transcription_engine = RealtimeTranscriptionEngine(
-            input_device_index=audio_device,
+                model=model,
-            user_name=user_name
+                device=device,
-        )
+                language=language,
                compute_type=compute_type,
                enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
                realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
                realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
                silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
                silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
                webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
                post_speech_silence_duration=post_speech_silence,
                min_length_of_recording=min_recording,
                min_gap_between_recordings=min_gap,
                pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
                beam_size=self.config.get('transcription.beam_size', 5),
                initial_prompt=self.config.get('transcription.initial_prompt', ''),
                no_log_file=self.config.get('transcription.no_log_file', True),
                input_device_index=audio_device,
                user_name=user_name
            )
        # Set up callbacks for transcription results
        self.transcription_engine.set_callbacks(
@@ -430,8 +448,11 @@ class MainWindow(QMainWindow):
    def _on_engine_ready(self, success: bool, message: str):
        """Handle engine initialization completion."""
        if success:
-            # Update device label with actual device used
+            remote_mode = self.config.get('remote.mode', 'local')
-            if self.transcription_engine:
+            if remote_mode in ('managed', 'byok'):
                mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
                self.device_label.setText(f"Device: Deepgram ({mode_label})")
            elif self.transcription_engine:
                actual_device = self.transcription_engine.device
                compute_type = self.transcription_engine.compute_type
                device_display = f"{actual_device.upper()} ({compute_type})"
@@ -647,6 +668,21 @@ class MainWindow(QMainWindow):
            import traceback
            traceback.print_exc()
    def _on_remote_error(self, error_msg: str):
        """Handle error from remote transcription service."""
        print(f"Remote transcription error: {error_msg}")
        self.status_label.setText(f"⚠ Remote error: {error_msg}")
        # Fallback to local if enabled
        if self.config.get('remote.fallback_to_local', True) and self.is_transcribing:
            print("Falling back to local transcription...")
            self.status_label.setText("⚠ Remote failed — falling back to local")
    def _on_credits_low(self, seconds_remaining: int):
        """Handle low credits warning from proxy."""
        minutes = seconds_remaining // 60
        self.status_label.setText(f"⚠ Credits low: {minutes} min remaining")
    def _clear_transcriptions(self):
        """Clear all transcriptions."""
        if not self.transcriptions:
--- a/gui/settings_dialog_qt.py
+++ b/gui/settings_dialog_qt.py
@@ -4,7 +4,7 @@ from PySide6.QtWidgets import (
    QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
    QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
    QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
-    QFileDialog, QColorDialog
+    QFileDialog, QColorDialog, QRadioButton
 )
 from PySide6.QtCore import Qt
 from PySide6.QtGui import QScreen, QFontDatabase, QColor
@@ -487,46 +487,91 @@ class SettingsDialog(QDialog):
        server_group.setLayout(server_layout)
        content_layout.addWidget(server_group)
-        # Remote Processing Group
+        # Transcription Mode Group
-        remote_group = QGroupBox("Remote Processing (GPU Offload)")
+        mode_group = QGroupBox("Transcription Mode")
-        remote_layout = QFormLayout()
+        mode_layout = QVBoxLayout()
-        remote_layout.setSpacing(10)
+        mode_layout.setSpacing(10)
-        self.remote_enabled_check = QCheckBox()
+        # Radio buttons for mode selection
-        self.remote_enabled_check.setToolTip(
+        self.mode_local_radio = QRadioButton("Local (Whisper)")
-            "Enable remote transcription processing:\n"
+        self.mode_local_radio.setToolTip("Transcribe locally using Whisper models")
-            "• Offload transcription to a GPU-equipped server\n"
+        self.mode_managed_radio = QRadioButton("Remote - Managed")
-            "• Reduces local CPU/GPU usage\n"
+        self.mode_managed_radio.setToolTip("Use the transcription proxy service with prepaid credits")
-            "• Requires running the remote transcription service"
+        self.mode_byok_radio = QRadioButton("Remote - BYOK (Bring Your Own Key)")
-        )
+        self.mode_byok_radio.setToolTip("Connect directly to Deepgram with your own API key")
        remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
-        self.remote_url_input = QLineEdit()
+        mode_layout.addWidget(self.mode_local_radio)
-        self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
+        mode_layout.addWidget(self.mode_managed_radio)
-        self.remote_url_input.setToolTip(
+        mode_layout.addWidget(self.mode_byok_radio)
            "WebSocket URL of the remote transcription service:\n"
            "• Format: ws://host:port/ws/transcribe\n"
            "• Use wss:// for secure connections"
        )
        remote_layout.addRow("Server URL:", self.remote_url_input)
-        self.remote_api_key_input = QLineEdit()
+        # Managed mode fields (shown when managed radio selected)
-        self.remote_api_key_input.setEchoMode(QLineEdit.Password)
+        self.managed_widget = QWidget()
-        self.remote_api_key_input.setPlaceholderText("your-api-key")
+        managed_layout = QFormLayout()
-        self.remote_api_key_input.setToolTip(
+        managed_layout.setSpacing(8)
            "API key for authentication with the remote service"
        )
        remote_layout.addRow("API Key:", self.remote_api_key_input)
-        self.remote_fallback_check = QCheckBox("Enable")
+        self.managed_server_url = QLineEdit()
-        self.remote_fallback_check.setChecked(True)
+        self.managed_server_url.setPlaceholderText("wss://your-proxy-server.com")
-        self.remote_fallback_check.setToolTip(
+        managed_layout.addRow("Server URL:", self.managed_server_url)
            "Fall back to local transcription if remote service is unavailable"
        )
        remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
-        remote_group.setLayout(remote_layout)
+        # Login/Register buttons in a row
-        content_layout.addWidget(remote_group)
+        auth_widget = QWidget()
        auth_layout = QHBoxLayout()
        auth_layout.setContentsMargins(0, 0, 0, 0)
        self.managed_login_btn = QPushButton("Login")
        self.managed_login_btn.clicked.connect(self._managed_login)
        self.managed_register_btn = QPushButton("Register")
        self.managed_register_btn.clicked.connect(self._managed_register)
        auth_layout.addWidget(self.managed_login_btn)
        auth_layout.addWidget(self.managed_register_btn)
        auth_layout.addStretch()
        auth_widget.setLayout(auth_layout)
        managed_layout.addRow("Account:", auth_widget)
        self.managed_balance_label = QLabel("Not logged in")
        managed_layout.addRow("Balance:", self.managed_balance_label)
        self.managed_fallback_check = QCheckBox("Enable")
        self.managed_fallback_check.setChecked(True)
        self.managed_fallback_check.setToolTip("Fall back to local Whisper if remote fails")
        managed_layout.addRow("Fallback to Local:", self.managed_fallback_check)
        self.managed_widget.setLayout(managed_layout)
        mode_layout.addWidget(self.managed_widget)
        # BYOK mode fields (shown when BYOK radio selected)
        self.byok_widget = QWidget()
        byok_layout = QFormLayout()
        byok_layout.setSpacing(8)
        self.byok_api_key_input = QLineEdit()
        self.byok_api_key_input.setEchoMode(QLineEdit.Password)
        self.byok_api_key_input.setPlaceholderText("your-deepgram-api-key")
        byok_layout.addRow("Deepgram API Key:", self.byok_api_key_input)
        self.byok_model_combo = QComboBox()
        self.byok_model_combo.addItems(["nova-2", "nova-2-general", "nova-2-meeting", "nova-2-phonecall", "whisper-large", "whisper-medium", "whisper-small"])
        byok_layout.addRow("Model:", self.byok_model_combo)
        self.byok_language_input = QLineEdit()
        self.byok_language_input.setText("en-US")
        self.byok_language_input.setPlaceholderText("en-US")
        byok_layout.addRow("Language:", self.byok_language_input)
        self.byok_fallback_check = QCheckBox("Enable")
        self.byok_fallback_check.setChecked(True)
        self.byok_fallback_check.setToolTip("Fall back to local Whisper if Deepgram fails")
        byok_layout.addRow("Fallback to Local:", self.byok_fallback_check)
        self.byok_widget.setLayout(byok_layout)
        mode_layout.addWidget(self.byok_widget)
        mode_group.setLayout(mode_layout)
        content_layout.addWidget(mode_group)
        # Connect radio buttons to show/hide relevant widgets
        self.mode_local_radio.toggled.connect(self._on_mode_changed)
        self.mode_managed_radio.toggled.connect(self._on_mode_changed)
        self.mode_byok_radio.toggled.connect(self._on_mode_changed)
        # Updates Group
        updates_group = QGroupBox("Software Updates")
@@ -794,11 +839,28 @@ class SettingsDialog(QDialog):
        self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
        self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
-        # Remote processing settings
+        # Transcription mode settings
-        self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
+        mode = self.config.get('remote.mode', 'local')
-        self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
+        if mode == 'managed':
-        self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
+            self.mode_managed_radio.setChecked(True)
-        self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
+        elif mode == 'byok':
            self.mode_byok_radio.setChecked(True)
        else:
            self.mode_local_radio.setChecked(True)
        self.managed_server_url.setText(self.config.get('remote.server_url', ''))
        self.managed_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
        self.byok_api_key_input.setText(self.config.get('remote.byok_api_key', ''))
        self.byok_model_combo.setCurrentText(self.config.get('remote.deepgram_model', 'nova-2'))
        self.byok_language_input.setText(self.config.get('remote.language', 'en-US'))
        self.byok_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
        # Trigger visibility update
        self._on_mode_changed()
        # Update balance if managed mode and has token
        if self.config.get('remote.auth_token'):
            self._update_managed_balance()
        # Update settings
        self.update_auto_check.setChecked(self.config.get('updates.auto_check', True))
@@ -869,11 +931,21 @@ class SettingsDialog(QDialog):
            self.config.set('server_sync.room', self.server_room_input.text())
            self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
-            # Remote processing settings
+            # Transcription mode settings
-            self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
+            if self.mode_managed_radio.isChecked():
-            self.config.set('remote_processing.server_url', self.remote_url_input.text())
+                self.config.set('remote.mode', 'managed')
-            self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
+            elif self.mode_byok_radio.isChecked():
-            self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
+                self.config.set('remote.mode', 'byok')
            else:
                self.config.set('remote.mode', 'local')
            self.config.set('remote.server_url', self.managed_server_url.text())
            self.config.set('remote.fallback_to_local',
                self.managed_fallback_check.isChecked() if self.mode_managed_radio.isChecked()
                else self.byok_fallback_check.isChecked())
            self.config.set('remote.byok_api_key', self.byok_api_key_input.text())
            self.config.set('remote.deepgram_model', self.byok_model_combo.currentText())
            self.config.set('remote.language', self.byok_language_input.text())
            # Update settings
            self.config.set('updates.auto_check', self.update_auto_check.isChecked())
@@ -892,6 +964,194 @@ class SettingsDialog(QDialog):
        except Exception as e:
            QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
    def _on_mode_changed(self):
        """Show/hide mode-specific widgets based on selected radio button."""
        self.managed_widget.setVisible(self.mode_managed_radio.isChecked())
        self.byok_widget.setVisible(self.mode_byok_radio.isChecked())
    def _managed_login(self):
        """Open a login dialog and authenticate with the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        dialog = QDialog(self)
        dialog.setWindowTitle("Login")
        dialog.setMinimumWidth(350)
        layout = QFormLayout()
        email_input = QLineEdit()
        email_input.setPlaceholderText("you@example.com")
        layout.addRow("Email:", email_input)
        password_input = QLineEdit()
        password_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Password:", password_input)
        button_layout = QHBoxLayout()
        cancel_btn = QPushButton("Cancel")
        cancel_btn.clicked.connect(dialog.reject)
        login_btn = QPushButton("Login")
        login_btn.setDefault(True)
        button_layout.addStretch()
        button_layout.addWidget(cancel_btn)
        button_layout.addWidget(login_btn)
        layout.addRow("", button_layout)
        dialog.setLayout(layout)
        def do_login():
            server_url = self.managed_server_url.text().rstrip('/')
            if not server_url:
                QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
                return
            payload = json.dumps({
                "email": email_input.text(),
                "password": password_input.text()
            }).encode('utf-8')
            req = urllib.request.Request(
                f"{server_url}/auth/login",
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST"
            )
            try:
                with urllib.request.urlopen(req, timeout=10) as resp:
                    data = json.loads(resp.read().decode('utf-8'))
                token = data.get('token', '')
                if token:
                    self.config.set('remote.auth_token', token)
                    self._update_managed_balance()
                    QMessageBox.information(dialog, "Success", "Logged in successfully.")
                    dialog.accept()
                else:
                    QMessageBox.warning(dialog, "Error", "Login succeeded but no token received.")
            except urllib.error.HTTPError as e:
                try:
                    body = json.loads(e.read().decode('utf-8'))
                    msg = body.get('detail', body.get('message', str(e)))
                except Exception:
                    msg = str(e)
                QMessageBox.warning(dialog, "Login Failed", msg)
            except Exception as e:
                QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
        login_btn.clicked.connect(do_login)
        dialog.exec()
    def _managed_register(self):
        """Open a registration dialog and create an account on the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        dialog = QDialog(self)
        dialog.setWindowTitle("Register")
        dialog.setMinimumWidth(350)
        layout = QFormLayout()
        email_input = QLineEdit()
        email_input.setPlaceholderText("you@example.com")
        layout.addRow("Email:", email_input)
        password_input = QLineEdit()
        password_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Password:", password_input)
        confirm_input = QLineEdit()
        confirm_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Confirm Password:", confirm_input)
        button_layout = QHBoxLayout()
        cancel_btn = QPushButton("Cancel")
        cancel_btn.clicked.connect(dialog.reject)
        register_btn = QPushButton("Register")
        register_btn.setDefault(True)
        button_layout.addStretch()
        button_layout.addWidget(cancel_btn)
        button_layout.addWidget(register_btn)
        layout.addRow("", button_layout)
        dialog.setLayout(layout)
        def do_register():
            if password_input.text() != confirm_input.text():
                QMessageBox.warning(dialog, "Error", "Passwords do not match.")
                return
            server_url = self.managed_server_url.text().rstrip('/')
            if not server_url:
                QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
                return
            payload = json.dumps({
                "email": email_input.text(),
                "password": password_input.text()
            }).encode('utf-8')
            req = urllib.request.Request(
                f"{server_url}/auth/register",
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST"
            )
            try:
                with urllib.request.urlopen(req, timeout=10) as resp:
                    data = json.loads(resp.read().decode('utf-8'))
                token = data.get('token', '')
                if token:
                    self.config.set('remote.auth_token', token)
                    self._update_managed_balance()
                    QMessageBox.information(dialog, "Success", "Account created and logged in.")
                    dialog.accept()
                else:
                    QMessageBox.information(dialog, "Success",
                        "Account created. Please log in.")
                    dialog.accept()
            except urllib.error.HTTPError as e:
                try:
                    body = json.loads(e.read().decode('utf-8'))
                    msg = body.get('detail', body.get('message', str(e)))
                except Exception:
                    msg = str(e)
                QMessageBox.warning(dialog, "Registration Failed", msg)
            except Exception as e:
                QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
        register_btn.clicked.connect(do_register)
        dialog.exec()
    def _update_managed_balance(self):
        """Fetch and display the current account balance from the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        server_url = self.managed_server_url.text().rstrip('/')
        token = self.config.get('remote.auth_token', '')
        if not server_url or not token:
            self.managed_balance_label.setText("Not logged in")
            return
        req = urllib.request.Request(
            f"{server_url}/billing/balance",
            headers={
                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json"
            },
            method="GET"
        )
        try:
            with urllib.request.urlopen(req, timeout=10) as resp:
                data = json.loads(resp.read().decode('utf-8'))
            balance = data.get('balance', data.get('credits', 'N/A'))
            self.managed_balance_label.setText(str(balance))
        except urllib.error.HTTPError as e:
            if e.code == 401:
                self.managed_balance_label.setText("Session expired - please login again")
                self.config.set('remote.auth_token', '')
            else:
                self.managed_balance_label.setText("Error fetching balance")
        except Exception:
            self.managed_balance_label.setText("Could not connect to server")
    def _check_for_updates_now(self):
        """Manually check for updates."""
        from version import __version__
--- a/index.html
+++ b/index.html
@@ -0,0 +1,13 @@
 <!doctype html>
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" type="image/png" href="/LocalTranscription.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Local Transcription</title>
  </head>
  <body>
    <div id="app"></div>
    <script type="module" src="/src/main.ts"></script>
  </body>
 </html>
--- a/local-transcription-headless.spec
+++ b/local-transcription-headless.spec
@@ -0,0 +1,184 @@
 # -*- mode: python ; coding: utf-8 -*-
 """PyInstaller spec file for headless Local Transcription backend (no PySide6/Qt).
 This builds the Python sidecar for the Tauri frontend.
 Much simpler than local-transcription.spec since all Qt dependencies are removed.
 """
 import sys
 import os
 block_cipher = None
 is_windows = sys.platform == 'win32'
 from PyInstaller.utils.hooks import collect_submodules, collect_data_files
 # Find faster_whisper assets folder
 import faster_whisper
 faster_whisper_path = os.path.dirname(faster_whisper.__file__)
 vad_assets_path = os.path.join(faster_whisper_path, 'assets')
 # pvporcupine resources (indirect dependency from RealtimeSTT)
 try:
    import pvporcupine
    pvporcupine_path = os.path.dirname(pvporcupine.__file__)
    pvporcupine_resources = os.path.join(pvporcupine_path, 'resources')
    pvporcupine_lib = os.path.join(pvporcupine_path, 'lib')
    pvporcupine_data_files = []
    if os.path.exists(pvporcupine_resources):
        pvporcupine_data_files.append((pvporcupine_resources, 'pvporcupine/resources'))
    if os.path.exists(pvporcupine_lib):
        pvporcupine_data_files.append((pvporcupine_lib, 'pvporcupine/lib'))
 except ImportError:
    pvporcupine_data_files = []
 # Data files
 datas = [
    ('config/default_config.yaml', 'config'),
    (vad_assets_path, 'faster_whisper/assets'),
 ] + pvporcupine_data_files
 # Hidden imports -- NO PySide6/Qt needed for headless backend
 hiddenimports = [
    # Transcription engine
    'faster_whisper',
    'faster_whisper.transcribe',
    'faster_whisper.vad',
    'ctranslate2',
    'sounddevice',
    'scipy',
    'scipy.signal',
    'numpy',
    # RealtimeSTT
    'RealtimeSTT',
    'RealtimeSTT.audio_recorder',
    'webrtcvad',
    'webrtcvad_wheels',
    'silero_vad',
    # PyTorch
    'torch',
    'torch.nn',
    'torch.nn.functional',
    'torchaudio',
    'onnxruntime',
    'onnxruntime.capi',
    'onnxruntime.capi.onnxruntime_pybind11_state',
    'pyaudio',
    'halo',
    'colorama',
    # FastAPI and dependencies
    'fastapi',
    'fastapi.routing',
    'fastapi.responses',
    'starlette',
    'starlette.applications',
    'starlette.routing',
    'starlette.responses',
    'starlette.websockets',
    'starlette.middleware',
    'starlette.middleware.cors',
    'pydantic',
    'pydantic.fields',
    'pydantic.main',
    'anyio',
    'anyio._backends',
    'anyio._backends._asyncio',
    'sniffio',
    # Uvicorn
    'uvicorn',
    'uvicorn.logging',
    'uvicorn.loops',
    'uvicorn.loops.auto',
    'uvicorn.protocols',
    'uvicorn.protocols.http',
    'uvicorn.protocols.http.auto',
    'uvicorn.protocols.http.h11_impl',
    'uvicorn.protocols.websockets',
    'uvicorn.protocols.websockets.auto',
    'uvicorn.protocols.websockets.wsproto_impl',
    'uvicorn.lifespan',
    'uvicorn.lifespan.on',
    'h11',
    'websockets',
    'websockets.legacy',
    'websockets.legacy.server',
    # HTTP client
    'requests',
    'urllib3',
    'certifi',
    'charset_normalizer',
 ]
 # Collect submodules for key packages
 print("Collecting submodules for backend packages...")
 for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11', 'httptools', 'uvloop']:
    try:
        submodules = collect_submodules(package)
        hiddenimports += submodules
        print(f"  + Collected {len(submodules)} submodules from {package}")
    except Exception as e:
        print(f"  - Warning: Could not collect {package}: {e}")
 # Collect data files
 for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn', 'RealtimeSTT']:
    try:
        data_files = collect_data_files(package)
        if data_files:
            datas += data_files
            print(f"  + Collected {len(data_files)} data files from {package}")
    except Exception:
        pass
 # Pydantic critical deps
 hiddenimports += [
    'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
    'email.message', 'typing_extensions',
 ]
 a = Analysis(
    ['backend/main_headless.py'],
    pathex=[],
    binaries=[],
    datas=datas,
    hiddenimports=hiddenimports,
    hookspath=['hooks'],
    hooksconfig={},
    runtime_hooks=[],
    excludes=['enum34', 'PySide6', 'PyQt5', 'PyQt6', 'tkinter'],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
 )
 pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='local-transcription-backend',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,  # Headless backend needs console for JSON output
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon='LocalTranscription.ico' if is_windows else None,
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='local-transcription-backend',
 )
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -0,0 +1,30 @@
 {
  "name": "local-transcription",
  "private": true,
  "version": "2.0.1",
  "type": "module",
  "scripts": {
    "dev": "vite dev",
    "build": "vite build",
    "preview": "vite preview",
    "tauri": "tauri"
  },
  "devDependencies": {
    "@sveltejs/vite-plugin-svelte": "^5.0.0",
    "@tauri-apps/cli": "^2.0.0",
    "@testing-library/svelte": "^5.3.1",
    "@tsconfig/svelte": "^5.0.0",
    "jsdom": "^29.0.2",
    "svelte": "^5.0.0",
    "svelte-check": "^4.0.0",
    "typescript": "~5.6.0",
    "vite": "^6.0.0",
    "vitest": "^4.1.3"
  },
  "dependencies": {
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
    "@tauri-apps/plugin-process": "^2.0.0",
    "@tauri-apps/plugin-shell": "^2.0.0"
  }
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "local-transcription"
-version = "1.0.0"
+version = "1.0.3"
 description = "A standalone desktop application for real-time speech-to-text transcription using Whisper models"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -0,0 +1,30 @@
 [package]
 name = "local-transcription"
 version = "2.0.1"
 description = "Real-time speech-to-text transcription for streamers"
 authors = ["Local Transcription Contributors"]
 edition = "2021"
 [lib]
 name = "local_transcription_lib"
 crate-type = ["lib", "cdylib", "staticlib"]
 [build-dependencies]
 tauri-build = { version = "2", features = [] }
 [dependencies]
 tauri = { version = "2", features = [] }
 tauri-plugin-shell = "2"
 tauri-plugin-dialog = "2"
 tauri-plugin-process = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 reqwest = { version = "0.12", features = ["json", "stream"] }
 futures-util = "0.3"
 zip = { version = "2", default-features = false, features = ["deflate"] }
 bytes = "1"
 tokio = { version = "1", features = ["full"] }
 chrono = "0.4"
 [dev-dependencies]
 tempfile = "3"
--- a/src-tauri/build.rs
+++ b/src-tauri/build.rs
@@ -0,0 +1,3 @@
 fn main() {
    tauri_build::build()
 }
--- a/src-tauri/capabilities/default.json
+++ b/src-tauri/capabilities/default.json
@@ -0,0 +1,14 @@
 {
  "identifier": "default",
  "description": "Default permissions for the main window",
  "windows": ["main"],
  "permissions": [
    "core:default",
    "core:event:default",
    "core:event:allow-listen",
    "core:event:allow-emit",
    "shell:default",
    "dialog:default",
    "process:default"
  ]
 }
--- a/src-tauri/gen/schemas/acl-manifests.json
+++ b/src-tauri/gen/schemas/acl-manifests.json
--- a/src-tauri/gen/schemas/capabilities.json
+++ b/src-tauri/gen/schemas/capabilities.json
@@ -0,0 +1 @@
 {}
--- a/src-tauri/gen/schemas/desktop-schema.json
+++ b/src-tauri/gen/schemas/desktop-schema.json
--- a/src-tauri/gen/schemas/linux-schema.json
+++ b/src-tauri/gen/schemas/linux-schema.json
--- a/src-tauri/icons/128x128.png
+++ b/src-tauri/icons/128x128.png
--- a/src-tauri/icons/128x128@2x.png
+++ b/src-tauri/icons/128x128@2x.png
--- a/src-tauri/icons/32x32.png
+++ b/src-tauri/icons/32x32.png
--- a/src-tauri/icons/icon.icns
+++ b/src-tauri/icons/icon.icns
--- a/src-tauri/icons/icon.ico
+++ b/src-tauri/icons/icon.ico
--- a/src-tauri/icons/icon.png
+++ b/src-tauri/icons/icon.png
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -0,0 +1,75 @@
 mod sidecar;
 use std::sync::Mutex;
 use tauri::Manager;
 /// App log directory, set during setup.
 static LOG_DIR: std::sync::OnceLock<std::path::PathBuf> = std::sync::OnceLock::new();
 /// Write a log message to the app's log file (for debugging).
 #[tauri::command]
 fn write_log(message: String) {
    if let Some(log_dir) = LOG_DIR.get() {
        let log_path = log_dir.join("frontend.log");
        use std::io::Write;
        if let Ok(mut f) = std::fs::OpenOptions::new()
            .create(true)
            .append(true)
            .open(&log_path)
        {
            let _ = writeln!(f, "[{}] {}", chrono::Local::now().format("%H:%M:%S%.3f"), message);
        }
    }
    eprintln!("[frontend] {}", message);
 }
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
 pub fn run() {
    tauri::Builder::default()
        .plugin(tauri_plugin_shell::init())
        .plugin(tauri_plugin_dialog::init())
        .plugin(tauri_plugin_process::init())
        .manage(sidecar::ManagedSidecar(Mutex::new(
            sidecar::SidecarManager::new(),
        )))
        .setup(|app| {
            let resource_dir = app
                .path()
                .resource_dir()
                .expect("failed to resolve resource dir");
            let data_dir = app
                .path()
                .app_data_dir()
                .expect("failed to resolve app data dir");
            std::fs::create_dir_all(&data_dir).expect("failed to create app data dir");
            // Set up logging
            LOG_DIR.set(data_dir.clone()).ok();
            let log_path = data_dir.join("app.log");
            if let Ok(mut f) = std::fs::OpenOptions::new()
                .create(true)
                .append(true)
                .open(&log_path)
            {
                use std::io::Write;
                let _ = writeln!(f, "\n=== App started at {} ===", chrono::Local::now());
                let _ = writeln!(f, "Resource dir: {}", resource_dir.display());
                let _ = writeln!(f, "Data dir: {}", data_dir.display());
            }
            sidecar::init_dirs(resource_dir, data_dir);
            Ok(())
        })
        .invoke_handler(tauri::generate_handler![
            sidecar::check_sidecar,
            sidecar::download_sidecar,
            sidecar::check_sidecar_update,
            sidecar::get_sidecar_port,
            sidecar::start_sidecar,
            sidecar::stop_sidecar,
            write_log,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
 }
--- a/src-tauri/src/main.rs
+++ b/src-tauri/src/main.rs
@@ -0,0 +1,6 @@
 // Prevents additional console window on Windows in release
 #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
 fn main() {
    local_transcription_lib::run()
 }
--- a/src-tauri/src/sidecar/mod.rs
+++ b/src-tauri/src/sidecar/mod.rs
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -0,0 +1,43 @@
 {
  "productName": "Local Transcription",
  "version": "2.0.1",
  "identifier": "net.anhonesthost.local-transcription",
  "build": {
    "frontendDist": "../dist",
    "devUrl": "http://localhost:1420",
    "beforeDevCommand": "npm run dev",
    "beforeBuildCommand": "npm run build"
  },
  "app": {
    "windows": [
      {
        "title": "Local Transcription",
        "width": 800,
        "height": 600,
        "minWidth": 640,
        "minHeight": 480,
        "resizable": true
      }
    ],
    "security": {
      "csp": null
    }
  },
  "bundle": {
    "active": true,
    "targets": "all",
    "icon": [
      "icons/32x32.png",
      "icons/128x128.png",
      "icons/128x128@2x.png",
      "icons/icon.icns",
      "icons/icon.ico",
      "icons/icon.png"
    ]
  },
  "plugins": {
    "shell": {
      "open": true
    }
  }
 }
--- a/src/App.svelte
+++ b/src/App.svelte
@@ -0,0 +1,312 @@
 <script lang="ts">
  import { onMount } from "svelte";
  import Header from "$lib/components/Header.svelte";
  import StatusBar from "$lib/components/StatusBar.svelte";
  import Controls from "$lib/components/Controls.svelte";
  import TranscriptionDisplay from "$lib/components/TranscriptionDisplay.svelte";
  import Settings from "$lib/components/Settings.svelte";
  import SidecarSetup from "$lib/components/SidecarSetup.svelte";
  import { backendStore } from "$lib/stores/backend";
  import { configStore } from "$lib/stores/config";
  type SidecarState = "checking" | "needs_setup" | "update_available" | "starting" | "connected";
  let showSettings = $state(false);
  let sidecarState = $state<SidecarState>("checking");
  let debugLog = $state("");
  let availableUpdate = $state("");
  let obsDisplayUrl = $derived(backendStore.obsUrl);
  let syncDisplayUrl = $derived(backendStore.syncUrl);
  let isConnected = $derived(backendStore.connectionState === "connected");
  let connectionState = $derived(backendStore.connectionState);
  function openSettings() {
    showSettings = true;
  }
  function closeSettings() {
    showSettings = false;
  }
  let tauriInvoke: ((cmd: string, args?: Record<string, unknown>) => Promise<unknown>) | null = null;
  function log(msg: string) {
    console.log(`[App] ${msg}`);
    debugLog = msg;
    // Also write to file via Tauri if available
    tauriInvoke?.("write_log", { message: msg });
  }
  async function checkAndLaunchSidecar() {
    try {
      log("Importing Tauri API...");
      const { invoke } = await import("@tauri-apps/api/core");
      tauriInvoke = invoke;
      log("Checking if sidecar is installed...");
      sidecarState = "checking";
      const installed = await invoke<boolean>("check_sidecar");
      log(`Sidecar installed: ${installed}`);
      if (!installed) {
        sidecarState = "needs_setup";
        return;
      }
      // Check for sidecar updates before launching
      try {
        log("Checking for sidecar updates...");
        const update = await invoke<string | null>("check_sidecar_update");
        if (update) {
          log(`Sidecar update available: ${update}`);
          availableUpdate = update;
          sidecarState = "update_available";
          return;
        }
      } catch (err) {
        log(`Update check failed (non-fatal): ${err}`);
      }
      await launchSidecar();
    } catch (err) {
      // Not running in Tauri (browser dev mode) - skip sidecar check
      // and connect directly to localhost:8081
      log(`Tauri not available (${err}), using dev mode`);
      sidecarState = "starting";
      backendStore.setPort(8081);
      backendStore.connect();
      configStore.fetchConfig();
    }
  }
  async function launchSidecar() {
    try {
      const { invoke } = await import("@tauri-apps/api/core");
      log("Starting sidecar...");
      sidecarState = "starting";
      await invoke("start_sidecar");
      log("Getting sidecar port...");
      const port = await invoke<number>("get_sidecar_port");
      log(`Sidecar ready on port ${port}`);
      backendStore.setPort(port);
      backendStore.connect();
      configStore.fetchConfig();
    } catch (err) {
      // If sidecar launch fails, still try connecting to default port
      log(`Sidecar launch failed: ${err}, trying default port`);
      sidecarState = "starting";
      backendStore.connect();
      configStore.fetchConfig();
    }
  }
  async function onSidecarReady() {
    await launchSidecar();
  }
  onMount(() => {
    checkAndLaunchSidecar();
    return () => {
      backendStore.disconnect();
    };
  });
 </script>
 {#if sidecarState === "checking"}
  <div class="connecting-overlay" style="background:#1e1e1e;color:#e0e0e0;display:flex;align-items:center;justify-content:center;height:100%;width:100%;">
    <div class="connecting-content" style="text-align:center;">
      <div class="connecting-icon">
        <div class="spinner"></div>
      </div>
      <h2 style="font-size:20px;margin:16px 0 8px;">Local Transcription</h2>
      <p style="color:#a0a0a0;font-size:14px;">Checking setup...</p>
      {#if debugLog}
        <p style="color:#707070;font-size:11px;margin-top:12px;">{debugLog}</p>
      {/if}
    </div>
  </div>
 {:else if sidecarState === "needs_setup"}
  <SidecarSetup onComplete={onSidecarReady} />
 {:else if sidecarState === "update_available"}
  <div class="connecting-overlay" style="background:#1e1e1e;color:#e0e0e0;display:flex;align-items:center;justify-content:center;height:100%;width:100%;">
    <div class="connecting-content" style="text-align:center;max-width:400px;">
      <h2 style="font-size:20px;margin:0 0 12px;">Sidecar Update Available</h2>
      <p style="color:#a0a0a0;font-size:14px;margin:0 0 20px;">
        A new version of the transcription engine is available ({availableUpdate}).
      </p>
      <div style="display:flex;gap:10px;justify-content:center;">
        <button
          style="padding:8px 20px;border:1px solid #555;border-radius:6px;background:transparent;color:#e0e0e0;cursor:pointer;"
          onclick={() => launchSidecar()}
        >Skip</button>
        <button
          style="padding:8px 20px;border:none;border-radius:6px;background:#4CAF50;color:white;cursor:pointer;font-weight:500;"
          onclick={() => { sidecarState = "needs_setup"; }}
        >Update Now</button>
      </div>
    </div>
  </div>
 {:else if !isConnected}
  <div class="connecting-overlay" style="background:#1e1e1e;color:#e0e0e0;display:flex;align-items:center;justify-content:center;height:100%;width:100%;">
    <div class="connecting-content" style="text-align:center;">
      <div class="connecting-icon">
        {#if connectionState === "error"}
          <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="#e74c3c" stroke-width="2">
            <circle cx="12" cy="12" r="10"/>
            <line x1="15" y1="9" x2="9" y2="15"/>
            <line x1="9" y1="9" x2="15" y2="15"/>
          </svg>
        {:else}
          <div class="spinner"></div>
        {/if}
      </div>
      <h2 style="font-size:20px;margin:16px 0 8px;">Local Transcription</h2>
      {#if connectionState === "error"}
        <p style="color:#a0a0a0;">Cannot connect to backend</p>
        <p class="hint">Make sure the Python backend is running:<br>
          <code>uv run python -m backend.main_headless</code></p>
      {:else}
        <p style="color:#a0a0a0;">Connecting to backend...</p>
      {/if}
      {#if debugLog}
        <p style="color:#707070;font-size:11px;margin-top:12px;">{debugLog}</p>
      {/if}
    </div>
  </div>
 {:else}
  <div class="app-shell">
    <Header onSettingsClick={openSettings} />
    <StatusBar />
    <div class="display-links">
      <span class="link-label">OBS:</span>
      <a href={obsDisplayUrl} target="_blank" rel="noopener">{obsDisplayUrl}</a>
      {#if syncDisplayUrl}
        <span class="link-separator">|</span>
        <span class="link-label">Sync:</span>
        <a href={syncDisplayUrl} target="_blank" rel="noopener"
          >{syncDisplayUrl}</a
        >
      {/if}
    </div>
    <TranscriptionDisplay />
    <Controls />
    <div class="version-label">v{backendStore.version}</div>
  </div>
  {#if showSettings}
    <Settings onClose={closeSettings} />
  {/if}
 {/if}
 <style>
  .connecting-overlay {
    display: flex;
    align-items: center;
    justify-content: center;
    height: 100%;
    width: 100%;
    background-color: var(--bg-primary);
  }
  .connecting-content {
    text-align: center;
    color: var(--text-primary);
  }
  .connecting-content h2 {
    margin: 16px 0 8px;
    font-size: 20px;
    font-weight: 600;
  }
  .connecting-content p {
    margin: 4px 0;
    color: var(--text-secondary);
    font-size: 14px;
  }
  .connecting-content .hint {
    margin-top: 16px;
    font-size: 12px;
    color: var(--text-muted);
  }
  .connecting-content code {
    display: inline-block;
    margin-top: 4px;
    padding: 4px 8px;
    background: var(--bg-tertiary);
    border-radius: 4px;
    font-size: 12px;
    color: var(--text-primary);
  }
  .connecting-icon {
    display: flex;
    justify-content: center;
    margin-bottom: 8px;
  }
  .spinner {
    width: 40px;
    height: 40px;
    border: 3px solid var(--border-color);
    border-top-color: var(--accent-color, #4CAF50);
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
  @keyframes spin {
    to { transform: rotate(360deg); }
  }
  .app-shell {
    display: flex;
    flex-direction: column;
    height: 100%;
    width: 100%;
    background-color: var(--bg-primary);
  }
  .display-links {
    display: flex;
    align-items: center;
    gap: 6px;
    padding: 6px 20px;
    font-size: 12px;
    background-color: var(--bg-primary);
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .link-label {
    color: var(--text-secondary);
    font-weight: 500;
  }
  .link-separator {
    color: var(--text-muted);
    margin: 0 4px;
  }
  .version-label {
    position: fixed;
    bottom: 6px;
    right: 12px;
    font-size: 11px;
    color: var(--text-muted);
    pointer-events: none;
    z-index: 10;
  }
 </style>
--- a/src/app.css
+++ b/src/app.css
@@ -0,0 +1,312 @@
 /* Global dark theme styles for Local Transcription */
 :root {
  --bg-primary: #1e1e1e;
  --bg-secondary: #2d2d2d;
  --bg-tertiary: #3a3a3a;
  --bg-hover: #454545;
  --text-primary: #e0e0e0;
  --text-secondary: #a0a0a0;
  --text-muted: #707070;
  --accent-green: #4caf50;
  --accent-green-hover: #45a049;
  --accent-red: #f44336;
  --accent-red-hover: #d32f2f;
  --accent-blue: #2196f3;
  --accent-blue-hover: #1976d2;
  --accent-orange: #ff9800;
  --border-color: #444;
  --border-color-light: #555;
  --scrollbar-track: #2d2d2d;
  --scrollbar-thumb: #555;
  --scrollbar-thumb-hover: #777;
 }
 *,
 *::before,
 *::after {
  box-sizing: border-box;
  margin: 0;
  padding: 0;
 }
 html,
 body {
  height: 100%;
  width: 100%;
  overflow: hidden;
 }
 body {
  background-color: var(--bg-primary);
  color: var(--text-primary);
  font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
    "Helvetica Neue", Arial, sans-serif;
  font-size: 14px;
  line-height: 1.5;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
 }
 #app {
  height: 100%;
  width: 100%;
  display: flex;
  flex-direction: column;
 }
 /* Buttons */
 button {
  font-family: inherit;
  font-size: 13px;
  font-weight: 500;
  padding: 8px 16px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  color: var(--text-primary);
  cursor: pointer;
  transition: background-color 0.15s ease, border-color 0.15s ease,
    transform 0.1s ease;
  user-select: none;
 }
 button:hover {
  background-color: var(--bg-hover);
  border-color: var(--border-color-light);
 }
 button:active {
  transform: scale(0.98);
 }
 button:disabled {
  opacity: 0.5;
  cursor: not-allowed;
  transform: none;
 }
 button.primary {
  background-color: var(--accent-green);
  border-color: var(--accent-green);
  color: white;
 }
 button.primary:hover {
  background-color: var(--accent-green-hover);
 }
 button.danger {
  background-color: var(--accent-red);
  border-color: var(--accent-red);
  color: white;
 }
 button.danger:hover {
  background-color: var(--accent-red-hover);
 }
 /* Inputs and Selects */
 input[type="text"],
 input[type="password"],
 input[type="number"],
 input[type="url"],
 input[type="email"],
 select,
 textarea {
  font-family: inherit;
  font-size: 13px;
  padding: 8px 12px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  color: var(--text-primary);
  outline: none;
  transition: border-color 0.15s ease;
  width: 100%;
 }
 input[type="text"]:focus,
 input[type="password"]:focus,
 input[type="number"]:focus,
 input[type="url"]:focus,
 input[type="email"]:focus,
 select:focus,
 textarea:focus {
  border-color: var(--accent-blue);
 }
 input[type="text"]::placeholder,
 input[type="password"]::placeholder,
 input[type="url"]::placeholder {
  color: var(--text-muted);
 }
 select {
  appearance: none;
  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a0a0a0' d='M6 8L1 3h10z'/%3E%3C/svg%3E");
  background-repeat: no-repeat;
  background-position: right 10px center;
  padding-right: 30px;
 }
 /* Color input */
 input[type="color"] {
  width: 50px;
  height: 36px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  cursor: pointer;
  padding: 2px;
 }
 input[type="color"]::-webkit-color-swatch-wrapper {
  padding: 2px;
 }
 input[type="color"]::-webkit-color-swatch {
  border: none;
  border-radius: 3px;
 }
 /* Range slider */
 input[type="range"] {
  -webkit-appearance: none;
  appearance: none;
  width: 100%;
  height: 6px;
  background: var(--bg-tertiary);
  border-radius: 3px;
  outline: none;
  cursor: pointer;
 }
 input[type="range"]::-webkit-slider-thumb {
  -webkit-appearance: none;
  appearance: none;
  width: 16px;
  height: 16px;
  border-radius: 50%;
  background: var(--accent-blue);
  cursor: pointer;
  border: 2px solid var(--bg-primary);
 }
 input[type="range"]::-moz-range-thumb {
  width: 16px;
  height: 16px;
  border-radius: 50%;
  background: var(--accent-blue);
  cursor: pointer;
  border: 2px solid var(--bg-primary);
 }
 /* Toggle / Checkbox styled as switch */
 input[type="checkbox"] {
  position: relative;
  width: 40px;
  height: 22px;
  -webkit-appearance: none;
  appearance: none;
  background-color: var(--bg-tertiary);
  border-radius: 11px;
  cursor: pointer;
  transition: background-color 0.2s ease;
  flex-shrink: 0;
 }
 input[type="checkbox"]::after {
  content: "";
  position: absolute;
  top: 2px;
  left: 2px;
  width: 18px;
  height: 18px;
  background-color: var(--text-secondary);
  border-radius: 50%;
  transition: transform 0.2s ease, background-color 0.2s ease;
 }
 input[type="checkbox"]:checked {
  background-color: var(--accent-green);
 }
 input[type="checkbox"]:checked::after {
  transform: translateX(18px);
  background-color: white;
 }
 /* Radio buttons */
 input[type="radio"] {
  -webkit-appearance: none;
  appearance: none;
  width: 18px;
  height: 18px;
  border: 2px solid var(--border-color);
  border-radius: 50%;
  background-color: var(--bg-secondary);
  cursor: pointer;
  position: relative;
  flex-shrink: 0;
 }
 input[type="radio"]:checked {
  border-color: var(--accent-blue);
 }
 input[type="radio"]:checked::after {
  content: "";
  position: absolute;
  top: 3px;
  left: 3px;
  width: 8px;
  height: 8px;
  background-color: var(--accent-blue);
  border-radius: 50%;
 }
 /* Scrollbar */
 ::-webkit-scrollbar {
  width: 8px;
  height: 8px;
 }
 ::-webkit-scrollbar-track {
  background: var(--scrollbar-track);
  border-radius: 4px;
 }
 ::-webkit-scrollbar-thumb {
  background: var(--scrollbar-thumb);
  border-radius: 4px;
 }
 ::-webkit-scrollbar-thumb:hover {
  background: var(--scrollbar-thumb-hover);
 }
 /* Firefox scrollbar */
 * {
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
 }
 /* Links */
 a {
  color: var(--accent-blue);
  text-decoration: none;
 }
 a:hover {
  text-decoration: underline;
 }
 /* Label */
 label {
  font-size: 13px;
  color: var(--text-secondary);
  display: flex;
  align-items: center;
  gap: 8px;
 }
--- a/src/lib/components/Controls.svelte
+++ b/src/lib/components/Controls.svelte
@@ -0,0 +1,116 @@
 <script lang="ts">
  import { backendStore } from "$lib/stores/backend";
  import { transcriptionStore } from "$lib/stores/transcriptions";
  let isTranscribing = $derived(backendStore.appState === "transcribing");
  let isReady = $derived(
    backendStore.appState === "ready" || backendStore.appState === "transcribing"
  );
  let isLoading = $state(false);
  async function toggleTranscription() {
    if (isLoading) return;
    isLoading = true;
    try {
      if (isTranscribing) {
        await backendStore.apiPost("/api/stop");
      } else {
        await backendStore.apiPost("/api/start");
      }
    } catch (err) {
      console.error("Failed to toggle transcription:", err);
    } finally {
      isLoading = false;
    }
  }
  async function clearTranscriptions() {
    try {
      await backendStore.apiPost("/api/clear");
      transcriptionStore.clearAll();
    } catch (err) {
      console.error("Failed to clear:", err);
    }
  }
  async function saveTranscriptions() {
    try {
      // Get transcription text from backend or local store
      let text: string;
      try {
        const data = await backendStore.apiGet<{ text: string }>("/api/transcriptions");
        text = data.text || transcriptionStore.getPlainText();
      } catch {
        text = transcriptionStore.getPlainText();
      }
      if (!text.trim()) {
        console.warn("No transcriptions to save");
        return;
      }
      // Try Tauri dialog for native save, fall back to browser download
      try {
        const { save } = await import("@tauri-apps/plugin-dialog");
        const filePath = await save({
          defaultPath: "transcription.txt",
          filters: [
            { name: "Text Files", extensions: ["txt"] },
            { name: "All Files", extensions: ["*"] },
          ],
        });
        if (filePath) {
          // Write via backend API
          await backendStore.apiPost("/api/save-file", { path: filePath, text });
        }
      } catch {
        // Fallback: browser-style download
        const blob = new Blob([text], { type: "text/plain" });
        const url = URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcription.txt";
        a.click();
        URL.revokeObjectURL(url);
      }
    } catch (err) {
      console.error("Failed to save:", err);
    }
  }
 </script>
 <div class="controls">
  <button
    class={isTranscribing ? "danger" : "primary"}
    onclick={toggleTranscription}
    disabled={!isReady || isLoading}
  >
    {#if isLoading}
      ...
    {:else if isTranscribing}
      Stop Transcription
    {:else}
      Start Transcription
    {/if}
  </button>
  <button onclick={clearTranscriptions} disabled={!backendStore.connected}>
    Clear
  </button>
  <button onclick={saveTranscriptions} disabled={!backendStore.connected}>
    Save
  </button>
 </div>
 <style>
  .controls {
    display: flex;
    align-items: center;
    gap: 8px;
    padding: 10px 20px;
    background-color: var(--bg-secondary);
    border-top: 1px solid var(--border-color);
    flex-shrink: 0;
  }
 </style>
--- a/src/lib/components/Header.svelte
+++ b/src/lib/components/Header.svelte
@@ -0,0 +1,82 @@
 <script lang="ts">
  interface Props {
    onSettingsClick: () => void;
  }
  let { onSettingsClick }: Props = $props();
 </script>
 <header class="app-header">
  <h1 class="app-title">Local Transcription</h1>
  <button class="settings-btn" onclick={onSettingsClick} title="Settings">
    <svg
      width="20"
      height="20"
      viewBox="0 0 24 24"
      fill="none"
      stroke="currentColor"
      stroke-width="2"
      stroke-linecap="round"
      stroke-linejoin="round"
    >
      <circle cx="12" cy="12" r="3"></circle>
      <path
        d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1
        0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0
        0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2
        2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65
        1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2
        0 0 1 0-2.83l.06-.06A1.65 1.65 0 0 0 4.68
        15a1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0
        0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0
        0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1
        2.83 0l.06.06A1.65 1.65 0 0 0 9 4.68a1.65 1.65 0
        0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2
        2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0
        1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0
        2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65
        0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0
        1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"
      ></path>
    </svg>
  </button>
 </header>
 <style>
  .app-header {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 12px 20px;
    background-color: var(--bg-secondary);
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .app-title {
    font-size: 24px;
    font-weight: 700;
    color: var(--text-primary);
    letter-spacing: -0.5px;
  }
  .settings-btn {
    display: flex;
    align-items: center;
    justify-content: center;
    width: 36px;
    height: 36px;
    padding: 0;
    border: 1px solid var(--border-color);
    border-radius: 8px;
    background-color: transparent;
    color: var(--text-secondary);
    cursor: pointer;
    transition: color 0.15s ease, background-color 0.15s ease;
  }
  .settings-btn:hover {
    color: var(--text-primary);
    background-color: var(--bg-tertiary);
  }
 </style>
--- a/src/lib/components/Settings.svelte
+++ b/src/lib/components/Settings.svelte
@@ -0,0 +1,821 @@
 <script lang="ts">
  import { configStore } from "$lib/stores/config";
  import { backendStore } from "$lib/stores/backend";
  interface Props {
    onClose: () => void;
  }
  let { onClose }: Props = $props();
  // Local copies of config values for editing
  let userName = $state("");
  let audioDevice = $state("default");
  let model = $state("base.en");
  let language = $state("en");
  let computeDevice = $state("auto");
  let computeType = $state("default");
  let enableRealtime = $state(false);
  let realtimeModel = $state("tiny.en");
  let realtimeProcessingPause = $state(0.1);
  let sileroSensitivity = $state(0.4);
  let webrtcSensitivity = $state(3);
  let postSpeechSilence = $state(0.3);
  let minRecordingLength = $state(0.5);
  let minGapBetween = $state(0);
  let continuousMode = $state(false);
  let showTimestamps = $state(true);
  let fadeSeconds = $state(10);
  let maxLines = $state(100);
  let fontSize = $state(12);
  let userColor = $state("#4CAF50");
  let textColor = $state("#FFFFFF");
  let backgroundColor = $state("#000000");
  let syncEnabled = $state(false);
  let syncUrl = $state("");
  let syncRoom = $state("default");
  let syncPassphrase = $state("");
  let remoteMode = $state("local");
  let remoteServerUrl = $state("");
  let byokApiKey = $state("");
  let managedEmail = $state("");
  let managedPassword = $state("");
  let autoCheckUpdates = $state(true);
  let saving = $state(false);
  let saveMessage = $state("");
  // Fetched device lists
  let audioDevices = $state<{ id: string; name: string }[]>([]);
  let computeDevices = $state<{ id: string; name: string }[]>([]);
  // Model options
  const modelOptions = [
    "tiny",
    "tiny.en",
    "base",
    "base.en",
    "small",
    "small.en",
    "medium",
    "medium.en",
    "large-v1",
    "large-v2",
    "large-v3",
  ];
  const computeTypeOptions = [
    { value: "default", label: "Default" },
    { value: "int8", label: "int8 (Fastest)" },
    { value: "float16", label: "float16 (GPU)" },
    { value: "float32", label: "float32 (Best Quality)" },
  ];
  const webrtcOptions = [
    { value: 0, label: "0 (Most Sensitive)" },
    { value: 1, label: "1" },
    { value: 2, label: "2" },
    { value: 3, label: "3 (Least Sensitive)" },
  ];
  // Load config values on mount
  $effect(() => {
    const cfg = configStore.config;
    userName = cfg.user.name;
    audioDevice = cfg.audio.input_device;
    model = cfg.transcription.model;
    language = cfg.transcription.language;
    computeDevice = cfg.transcription.device;
    computeType = cfg.transcription.compute_type;
    enableRealtime = cfg.transcription.enable_realtime_transcription;
    realtimeModel = cfg.transcription.realtime_model;
    realtimeProcessingPause = cfg.transcription.realtime_processing_pause;
    sileroSensitivity = cfg.transcription.silero_sensitivity;
    webrtcSensitivity = cfg.transcription.webrtc_sensitivity;
    postSpeechSilence = cfg.transcription.post_speech_silence_duration;
    minRecordingLength = cfg.transcription.min_length_of_recording;
    minGapBetween = cfg.transcription.min_gap_between_recordings;
    continuousMode = cfg.transcription.continuous_mode;
    showTimestamps = cfg.display.show_timestamps;
    fadeSeconds = cfg.display.fade_after_seconds;
    maxLines = cfg.display.max_lines;
    fontSize = cfg.display.font_size;
    userColor = cfg.display.user_color;
    textColor = cfg.display.text_color;
    // Strip alpha from background color for the color picker (only supports 6-char hex)
    const bgHex = cfg.display.background_color.replace("#", "");
    backgroundColor = "#" + bgHex.substring(0, 6);
    syncEnabled = cfg.server_sync.enabled;
    syncUrl = cfg.server_sync.url;
    syncRoom = cfg.server_sync.room;
    syncPassphrase = cfg.server_sync.passphrase;
    remoteMode = cfg.remote.mode;
    remoteServerUrl = cfg.remote.server_url;
    byokApiKey = cfg.remote.byok_api_key ?? "";
    autoCheckUpdates = cfg.updates.auto_check;
  });
  // Fetch audio devices and compute devices on mount
  $effect(() => {
    fetchAudioDevices();
    fetchComputeDevices();
  });
  async function fetchAudioDevices() {
    try {
      const data = await backendStore.apiGet<{
        devices: { id: string; name: string }[];
      }>("/api/audio-devices");
      audioDevices = data.devices ?? [];
    } catch {
      audioDevices = [];
    }
  }
  async function fetchComputeDevices() {
    try {
      const data = await backendStore.apiGet<{
        devices: { id: string; name: string }[];
      }>("/api/compute-devices");
      computeDevices = data.devices ?? [];
    } catch {
      computeDevices = [
        { id: "auto", name: "Auto" },
        { id: "cpu", name: "CPU" },
        { id: "cuda", name: "CUDA (GPU)" },
      ];
    }
  }
  async function handleSave() {
    const updates = {
      user: {
        name: userName,
      },
      audio: {
        input_device: audioDevice,
      },
      transcription: {
        model,
        device: computeDevice,
        language,
        compute_type: computeType,
        enable_realtime_transcription: enableRealtime,
        realtime_model: realtimeModel,
        realtime_processing_pause: realtimeProcessingPause,
        silero_sensitivity: sileroSensitivity,
        webrtc_sensitivity: webrtcSensitivity,
        post_speech_silence_duration: postSpeechSilence,
        min_length_of_recording: minRecordingLength,
        min_gap_between_recordings: minGapBetween,
        continuous_mode: continuousMode,
      },
      display: {
        show_timestamps: showTimestamps,
        fade_after_seconds: fadeSeconds,
        max_lines: maxLines,
        font_size: fontSize,
        user_color: userColor,
        text_color: textColor,
        background_color: backgroundColor,
      },
      server_sync: {
        enabled: syncEnabled,
        url: syncUrl,
        room: syncRoom,
        passphrase: syncPassphrase,
      },
      remote: {
        mode: remoteMode,
        server_url: remoteServerUrl,
        byok_api_key: byokApiKey,
      },
      updates: {
        auto_check: autoCheckUpdates,
      },
    };
    saving = true;
    saveMessage = "";
    try {
      await configStore.updateConfig(updates);
      saveMessage = "Settings saved!";
      setTimeout(() => onClose(), 600);
    } catch (err) {
      console.error("Failed to save settings:", err);
      saveMessage = `Error: ${err}`;
      saving = false;
    }
  }
  function handleCancel() {
    onClose();
  }
  async function handleCheckUpdates() {
    try {
      await backendStore.apiGet("/api/check-update");
    } catch (err) {
      console.error("Failed to check for updates:", err);
    }
  }
  async function handleManagedLogin() {
    try {
      await backendStore.apiPost("/api/login", {
        email: managedEmail,
        password: managedPassword,
      });
    } catch (err) {
      console.error("Login failed:", err);
    }
  }
  async function handleManagedRegister() {
    try {
      await backendStore.apiPost("/api/register", {
        email: managedEmail,
        password: managedPassword,
      });
    } catch (err) {
      console.error("Register failed:", err);
    }
  }
  function handleOverlayClick(e: MouseEvent) {
    if ((e.target as HTMLElement).classList.contains("settings-overlay")) {
      handleCancel();
    }
  }
  function handleKeydown(e: KeyboardEvent) {
    if (e.key === "Escape") {
      handleCancel();
    }
  }
 </script>
 <svelte:window onkeydown={handleKeydown} />
 <!-- svelte-ignore a11y_click_events_have_key_events a11y_no_static_element_interactions -->
 <div class="settings-overlay" role="presentation" onclick={handleOverlayClick}>
  <div class="settings-panel">
    <div class="settings-header">
      <h2>Settings</h2>
      <button class="close-btn" aria-label="Close settings" onclick={handleCancel}>
        <svg
          width="18"
          height="18"
          viewBox="0 0 24 24"
          fill="none"
          stroke="currentColor"
          stroke-width="2"
          stroke-linecap="round"
          stroke-linejoin="round"
        >
          <line x1="18" y1="6" x2="6" y2="18"></line>
          <line x1="6" y1="6" x2="18" y2="18"></line>
        </svg>
      </button>
    </div>
    <div class="settings-content">
      <!-- User Settings -->
      <section class="settings-section">
        <h3>User Settings</h3>
        <div class="field">
          <label for="user-name">Display Name</label>
          <input id="user-name" type="text" bind:value={userName} />
        </div>
      </section>
      <!-- Audio Settings -->
      <section class="settings-section">
        <h3>Audio Settings</h3>
        <div class="field">
          <label for="audio-device">Audio Device</label>
          <select id="audio-device" bind:value={audioDevice}>
            <option value="default">Default</option>
            {#each audioDevices as device}
              <option value={device.id}>{device.name}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Transcription Settings -->
      <section class="settings-section">
        <h3>Transcription Settings</h3>
        <div class="field">
          <label for="model">Model</label>
          <select id="model" bind:value={model}>
            {#each modelOptions as opt}
              <option value={opt}>{opt}</option>
            {/each}
          </select>
        </div>
        <div class="field">
          <label for="language">Language</label>
          <input id="language" type="text" bind:value={language} placeholder="en" />
        </div>
        <div class="field">
          <label for="compute-device">Compute Device</label>
          <select id="compute-device" bind:value={computeDevice}>
            {#each computeDevices as dev}
              <option value={dev.id}>{dev.name}</option>
            {/each}
          </select>
        </div>
        <div class="field">
          <label for="compute-type">Compute Type</label>
          <select id="compute-type" bind:value={computeType}>
            {#each computeTypeOptions as opt}
              <option value={opt.value}>{opt.label}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Realtime Preview -->
      <section class="settings-section">
        <h3>Realtime Preview</h3>
        <div class="field-row">
          <label for="enable-realtime">Enable Realtime Preview</label>
          <input
            id="enable-realtime"
            type="checkbox"
            bind:checked={enableRealtime}
          />
        </div>
        {#if enableRealtime}
          <div class="field">
            <label for="realtime-model">Realtime Model</label>
            <select id="realtime-model" bind:value={realtimeModel}>
              {#each modelOptions as opt}
                <option value={opt}>{opt}</option>
              {/each}
            </select>
          </div>
          <div class="field">
            <label for="realtime-pause"
              >Processing Pause: {realtimeProcessingPause.toFixed(2)}s</label
            >
            <input
              id="realtime-pause"
              type="range"
              min="0.01"
              max="1.0"
              step="0.01"
              bind:value={realtimeProcessingPause}
            />
          </div>
        {/if}
      </section>
      <!-- VAD Settings -->
      <section class="settings-section">
        <h3>VAD Settings</h3>
        <div class="field">
          <label for="silero-sensitivity"
            >Silero Sensitivity: {sileroSensitivity.toFixed(2)}</label
          >
          <input
            id="silero-sensitivity"
            type="range"
            min="0.0"
            max="1.0"
            step="0.05"
            bind:value={sileroSensitivity}
          />
        </div>
        <div class="field">
          <label for="webrtc-sensitivity">WebRTC Sensitivity</label>
          <select id="webrtc-sensitivity" bind:value={webrtcSensitivity}>
            {#each webrtcOptions as opt}
              <option value={opt.value}>{opt.label}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Timing -->
      <section class="settings-section">
        <h3>Timing</h3>
        <div class="field">
          <label for="post-speech-silence"
            >Post-Speech Silence: {postSpeechSilence.toFixed(2)}s</label
          >
          <input
            id="post-speech-silence"
            type="range"
            min="0.1"
            max="3.0"
            step="0.1"
            bind:value={postSpeechSilence}
          />
        </div>
        <div class="field">
          <label for="min-recording"
            >Min Recording Length: {minRecordingLength.toFixed(2)}s</label
          >
          <input
            id="min-recording"
            type="range"
            min="0.1"
            max="5.0"
            step="0.1"
            bind:value={minRecordingLength}
          />
        </div>
        <div class="field">
          <label for="min-gap"
            >Min Gap Between Recordings: {minGapBetween.toFixed(2)}s</label
          >
          <input
            id="min-gap"
            type="range"
            min="0"
            max="3.0"
            step="0.1"
            bind:value={minGapBetween}
          />
        </div>
        <div class="field-row">
          <label for="continuous-mode">Continuous Mode</label>
          <input
            id="continuous-mode"
            type="checkbox"
            bind:checked={continuousMode}
          />
        </div>
      </section>
      <!-- Display Settings -->
      <section class="settings-section">
        <h3>Display Settings</h3>
        <div class="field-row">
          <label for="show-timestamps">Show Timestamps</label>
          <input
            id="show-timestamps"
            type="checkbox"
            bind:checked={showTimestamps}
          />
        </div>
        <div class="field">
          <label for="fade-seconds"
            >Fade After Seconds: {fadeSeconds} (0 = never)</label
          >
          <input
            id="fade-seconds"
            type="range"
            min="0"
            max="60"
            step="1"
            bind:value={fadeSeconds}
          />
        </div>
        <div class="field">
          <label for="max-lines">Max Lines: {maxLines}</label>
          <input
            id="max-lines"
            type="range"
            min="10"
            max="500"
            step="10"
            bind:value={maxLines}
          />
        </div>
        <div class="field">
          <label for="font-size">Font Size: {fontSize}px</label>
          <input
            id="font-size"
            type="range"
            min="8"
            max="32"
            step="1"
            bind:value={fontSize}
          />
        </div>
      </section>
      <!-- Color Settings -->
      <section class="settings-section">
        <h3>Color Settings</h3>
        <div class="field-row">
          <label for="user-color">User Color</label>
          <input id="user-color" type="color" bind:value={userColor} />
        </div>
        <div class="field-row">
          <label for="text-color">Text Color</label>
          <input id="text-color" type="color" bind:value={textColor} />
        </div>
        <div class="field-row">
          <label for="bg-color">Background Color</label>
          <input id="bg-color" type="color" bind:value={backgroundColor} />
        </div>
      </section>
      <!-- Server Sync -->
      <section class="settings-section">
        <h3>Server Sync</h3>
        <div class="field-row">
          <label for="sync-enabled">Enable Server Sync</label>
          <input
            id="sync-enabled"
            type="checkbox"
            bind:checked={syncEnabled}
          />
        </div>
        {#if syncEnabled}
          <div class="field">
            <label for="sync-url">Server URL</label>
            <input
              id="sync-url"
              type="url"
              bind:value={syncUrl}
              placeholder="http://localhost:3000/api/send"
            />
          </div>
          <div class="field">
            <label for="sync-room">Room</label>
            <input id="sync-room" type="text" bind:value={syncRoom} />
          </div>
          <div class="field">
            <label for="sync-passphrase">Passphrase</label>
            <input
              id="sync-passphrase"
              type="password"
              bind:value={syncPassphrase}
            />
          </div>
        {/if}
      </section>
      <!-- Remote Transcription -->
      <section class="settings-section">
        <h3>Remote Transcription</h3>
        <div class="radio-group">
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="local"
              bind:group={remoteMode}
            />
            Local
          </label>
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="managed"
              bind:group={remoteMode}
            />
            Managed
          </label>
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="byok"
              bind:group={remoteMode}
            />
            BYOK (Bring Your Own Key)
          </label>
        </div>
        {#if remoteMode === "managed"}
          <div class="field">
            <label for="remote-url">Server URL</label>
            <input
              id="remote-url"
              type="url"
              bind:value={remoteServerUrl}
              placeholder="wss://your-proxy.com"
            />
          </div>
        {/if}
        {#if remoteMode === "byok"}
          <div class="field">
            <label for="byok-key">Deepgram API Key</label>
            <input
              id="byok-key"
              type="password"
              bind:value={byokApiKey}
              placeholder="Enter your Deepgram API key"
            />
            <p style="font-size: 11px; color: var(--text-muted); margin-top: 4px;">
              Get a key at <a href="https://console.deepgram.com" target="_blank" rel="noopener" style="color: var(--accent-blue);">console.deepgram.com</a>
            </p>
          </div>
        {/if}
        {#if remoteMode === "managed"}
          <div class="managed-auth">
            <div class="field">
              <label for="managed-email">Email</label>
              <input
                id="managed-email"
                type="email"
                bind:value={managedEmail}
                placeholder="email@example.com"
              />
            </div>
            <div class="field">
              <label for="managed-password">Password</label>
              <input
                id="managed-password"
                type="password"
                bind:value={managedPassword}
              />
            </div>
            <div class="auth-buttons">
              <button onclick={handleManagedLogin}>Login</button>
              <button onclick={handleManagedRegister}>Register</button>
            </div>
          </div>
        {/if}
      </section>
      <!-- Updates -->
      <section class="settings-section">
        <h3>Updates</h3>
        <div class="field-row">
          <label for="auto-check-updates">Auto-Check for Updates</label>
          <input
            id="auto-check-updates"
            type="checkbox"
            bind:checked={autoCheckUpdates}
          />
        </div>
        <button onclick={handleCheckUpdates}>Check Now</button>
      </section>
    </div>
    <div class="settings-footer">
      {#if saveMessage}
        <span class="save-message" class:error={saveMessage.startsWith("Error")}>{saveMessage}</span>
      {/if}
      <button onclick={handleCancel} disabled={saving}>Cancel</button>
      <button class="primary" onclick={handleSave} disabled={saving}>
        {saving ? "Saving..." : "Save"}
      </button>
    </div>
  </div>
 </div>
 <style>
  .settings-overlay {
    position: fixed;
    top: 0;
    left: 0;
    right: 0;
    bottom: 0;
    background-color: rgba(0, 0, 0, 0.6);
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: 1000;
  }
  .settings-panel {
    background-color: var(--bg-primary);
    border: 1px solid var(--border-color);
    border-radius: 12px;
    width: 560px;
    max-width: 95vw;
    max-height: 85vh;
    display: flex;
    flex-direction: column;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
  }
  .settings-header {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 16px 20px;
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .settings-header h2 {
    font-size: 18px;
    font-weight: 600;
    color: var(--text-primary);
  }
  .close-btn {
    display: flex;
    align-items: center;
    justify-content: center;
    width: 32px;
    height: 32px;
    padding: 0;
    border: none;
    border-radius: 6px;
    background-color: transparent;
    color: var(--text-secondary);
    cursor: pointer;
  }
  .close-btn:hover {
    background-color: var(--bg-tertiary);
    color: var(--text-primary);
  }
  .settings-content {
    flex: 1;
    overflow-y: auto;
    padding: 16px 20px;
  }
  .settings-section {
    margin-bottom: 24px;
  }
  .settings-section:last-child {
    margin-bottom: 0;
  }
  .settings-section h3 {
    font-size: 14px;
    font-weight: 600;
    color: var(--accent-blue);
    text-transform: uppercase;
    letter-spacing: 0.5px;
    margin-bottom: 12px;
    padding-bottom: 6px;
    border-bottom: 1px solid var(--border-color);
  }
  .field {
    margin-bottom: 12px;
  }
  .field label {
    display: block;
    margin-bottom: 4px;
    font-size: 12px;
    color: var(--text-secondary);
  }
  .field-row {
    display: flex;
    align-items: center;
    justify-content: space-between;
    margin-bottom: 12px;
  }
  .field-row label {
    font-size: 13px;
    color: var(--text-primary);
  }
  .radio-group {
    display: flex;
    flex-direction: column;
    gap: 8px;
    margin-bottom: 12px;
  }
  .radio-group label {
    display: flex;
    align-items: center;
    gap: 8px;
    font-size: 13px;
    color: var(--text-primary);
    cursor: pointer;
  }
  .managed-auth {
    margin-top: 8px;
    padding: 12px;
    background-color: var(--bg-secondary);
    border-radius: 8px;
  }
  .auth-buttons {
    display: flex;
    gap: 8px;
    margin-top: 8px;
  }
  .settings-footer {
    display: flex;
    align-items: center;
    justify-content: flex-end;
    gap: 8px;
    padding: 16px 20px;
    border-top: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .save-message {
    margin-right: auto;
    font-size: 13px;
    color: #4CAF50;
  }
  .save-message.error {
    color: #f44336;
  }
 </style>
--- a/src/lib/components/SidecarSetup.svelte
+++ b/src/lib/components/SidecarSetup.svelte
@@ -0,0 +1,384 @@
 <script lang="ts">
  import { invoke } from "@tauri-apps/api/core";
  import { listen } from "@tauri-apps/api/event";
  import { onMount } from "svelte";
  interface Props {
    onComplete: () => void;
  }
  let { onComplete }: Props = $props();
  type SetupState = "choose" | "downloading" | "error" | "success";
  let setupState = $state<SetupState>("choose");
  let variant = $state<"cpu" | "cuda">("cpu");
  let progress = $state(0);
  let progressMessage = $state("");
  let errorMessage = $state("");
  let unlisten: (() => void) | null = null;
  onMount(() => {
    return () => {
      if (unlisten) {
        unlisten();
        unlisten = null;
      }
    };
  });
  async function startDownload() {
    setupState = "downloading";
    progress = 0;
    progressMessage = "Starting download...";
    errorMessage = "";
    try {
      // Listen for progress events from the Tauri backend
      unlisten = await listen<{ progress: number; message: string }>(
        "sidecar-download-progress",
        (event) => {
          progress = event.payload.progress;
          progressMessage = event.payload.message;
        }
      );
      await invoke("download_sidecar", { variant });
      // Download complete
      setupState = "success";
      if (unlisten) {
        unlisten();
        unlisten = null;
      }
      // Brief pause to show success, then proceed
      setTimeout(() => {
        onComplete();
      }, 1500);
    } catch (err) {
      setupState = "error";
      errorMessage = err instanceof Error ? err.message : String(err);
      if (unlisten) {
        unlisten();
        unlisten = null;
      }
    }
  }
  function retry() {
    setupState = "choose";
    progress = 0;
    progressMessage = "";
    errorMessage = "";
  }
 </script>
 <div class="setup-overlay">
  <div class="setup-card">
    <div class="setup-header">
      <h1 class="app-title">Local Transcription</h1>
      <h2 class="setup-heading">First-Time Setup</h2>
    </div>
    {#if setupState === "choose"}
      <p class="setup-description">
        The app needs to download its transcription engine before you can start.
        Choose the version that best fits your hardware.
      </p>
      <div class="variant-options">
        <label class="variant-option" class:selected={variant === "cpu"}>
          <input
            type="radio"
            name="variant"
            value="cpu"
            bind:group={variant}
          />
          <div class="variant-info">
            <span class="variant-name">Standard (CPU)</span>
            <span class="variant-desc">Works on all computers (~500 MB download)</span>
          </div>
        </label>
        <label class="variant-option" class:selected={variant === "cuda"}>
          <input
            type="radio"
            name="variant"
            value="cuda"
            bind:group={variant}
          />
          <div class="variant-info">
            <span class="variant-name">GPU Accelerated (CUDA)</span>
            <span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
          </div>
        </label>
      </div>
      <button class="download-btn" onclick={startDownload}>
        Download & Install
      </button>
    {:else if setupState === "downloading"}
      <div class="progress-section">
        <p class="progress-message">{progressMessage}</p>
        <div class="progress-bar-track">
          <div
            class="progress-bar-fill"
            style="width: {progress}%"
          ></div>
        </div>
        <p class="progress-percent">{Math.round(progress)}%</p>
      </div>
    {:else if setupState === "error"}
      <div class="error-section">
        <div class="error-icon">
          <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="#f44336" stroke-width="2">
            <circle cx="12" cy="12" r="10"/>
            <line x1="15" y1="9" x2="9" y2="15"/>
            <line x1="9" y1="9" x2="15" y2="15"/>
          </svg>
        </div>
        <p class="error-title">Download Failed</p>
        <p class="error-message">{errorMessage}</p>
        <button class="retry-btn" onclick={retry}>
          Try Again
        </button>
      </div>
    {:else if setupState === "success"}
      <div class="success-section">
        <div class="success-icon">
          <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="#4CAF50" stroke-width="2">
            <circle cx="12" cy="12" r="10"/>
            <polyline points="16 9 10.5 15 8 12.5"/>
          </svg>
        </div>
        <p class="success-title">Setup Complete</p>
        <p class="success-message">The transcription engine is ready to go.</p>
      </div>
    {/if}
  </div>
 </div>
 <style>
  .setup-overlay {
    display: flex;
    align-items: center;
    justify-content: center;
    height: 100%;
    width: 100%;
    background-color: #1e1e1e;
  }
  .setup-card {
    background-color: #2a2a2a;
    border-radius: 12px;
    padding: 40px;
    max-width: 480px;
    width: 100%;
    margin: 20px;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
  }
  .setup-header {
    text-align: center;
    margin-bottom: 24px;
  }
  .app-title {
    font-size: 24px;
    font-weight: 700;
    color: #e0e0e0;
    margin-bottom: 4px;
  }
  .setup-heading {
    font-size: 16px;
    font-weight: 500;
    color: #a0a0a0;
  }
  .setup-description {
    font-size: 14px;
    color: #a0a0a0;
    line-height: 1.6;
    text-align: center;
    margin-bottom: 24px;
  }
  .variant-options {
    display: flex;
    flex-direction: column;
    gap: 12px;
    margin-bottom: 24px;
  }
  .variant-option {
    display: flex;
    align-items: center;
    gap: 12px;
    padding: 14px 16px;
    border: 2px solid #444;
    border-radius: 8px;
    cursor: pointer;
    transition: border-color 0.15s ease, background-color 0.15s ease;
  }
  .variant-option:hover {
    background-color: #333;
    border-color: #555;
  }
  .variant-option.selected {
    border-color: #4CAF50;
    background-color: rgba(76, 175, 80, 0.08);
  }
  .variant-option input[type="radio"] {
    width: 18px;
    height: 18px;
    flex-shrink: 0;
  }
  .variant-info {
    display: flex;
    flex-direction: column;
    gap: 2px;
  }
  .variant-name {
    font-size: 14px;
    font-weight: 600;
    color: #e0e0e0;
  }
  .variant-desc {
    font-size: 12px;
    color: #888;
  }
  .download-btn {
    display: block;
    width: 100%;
    padding: 12px 24px;
    font-size: 15px;
    font-weight: 600;
    color: white;
    background-color: #4CAF50;
    border: none;
    border-radius: 8px;
    cursor: pointer;
    transition: background-color 0.15s ease;
  }
  .download-btn:hover {
    background-color: #45a049;
  }
  .download-btn:active {
    transform: scale(0.98);
  }
  /* Progress state */
  .progress-section {
    text-align: center;
    padding: 20px 0;
  }
  .progress-message {
    font-size: 14px;
    color: #a0a0a0;
    margin-bottom: 16px;
  }
  .progress-bar-track {
    width: 100%;
    height: 8px;
    background-color: #3a3a3a;
    border-radius: 4px;
    overflow: hidden;
    margin-bottom: 8px;
  }
  .progress-bar-fill {
    height: 100%;
    background-color: #4CAF50;
    border-radius: 4px;
    transition: width 0.3s ease;
  }
  .progress-percent {
    font-size: 13px;
    color: #707070;
  }
  /* Error state */
  .error-section {
    text-align: center;
    padding: 10px 0;
  }
  .error-icon {
    display: flex;
    justify-content: center;
    margin-bottom: 12px;
  }
  .error-title {
    font-size: 18px;
    font-weight: 600;
    color: #f44336;
    margin-bottom: 8px;
  }
  .error-message {
    font-size: 13px;
    color: #a0a0a0;
    margin-bottom: 20px;
    word-break: break-word;
  }
  .retry-btn {
    display: inline-block;
    padding: 10px 28px;
    font-size: 14px;
    font-weight: 600;
    color: white;
    background-color: #4CAF50;
    border: none;
    border-radius: 8px;
    cursor: pointer;
    transition: background-color 0.15s ease;
  }
  .retry-btn:hover {
    background-color: #45a049;
  }
  /* Success state */
  .success-section {
    text-align: center;
    padding: 20px 0;
  }
  .success-icon {
    display: flex;
    justify-content: center;
    margin-bottom: 12px;
  }
  .success-title {
    font-size: 18px;
    font-weight: 600;
    color: #4CAF50;
    margin-bottom: 4px;
  }
  .success-message {
    font-size: 14px;
    color: #a0a0a0;
  }
 </style>
--- a/src/lib/components/StatusBar.svelte
+++ b/src/lib/components/StatusBar.svelte
@@ -0,0 +1,106 @@
 <script lang="ts">
  import { backendStore } from "$lib/stores/backend";
  import { configStore } from "$lib/stores/config";
  let statusColor = $derived.by(() => {
    switch (backendStore.appState) {
      case "initializing":
        return "#ff9800";
      case "ready":
        return "#4caf50";
      case "transcribing":
        return "#f44336";
      case "error":
        return "#f44336";
      default:
        return "#888";
    }
  });
  let isPulsing = $derived(backendStore.appState === "transcribing");
  let userName = $derived(configStore.config.user.name);
 </script>
 <div class="status-bar">
  <div class="status-left">
    <span
      class="status-indicator"
      class:pulsing={isPulsing}
      style="background-color: {statusColor}"
    ></span>
    <span class="state-message">{backendStore.stateMessage}</span>
  </div>
  <div class="status-right">
    {#if backendStore.deviceInfo}
      <span class="device-info">{backendStore.deviceInfo}</span>
      <span class="separator">|</span>
    {/if}
    <span class="user-name">{userName}</span>
  </div>
 </div>
 <style>
  .status-bar {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 6px 20px;
    background-color: var(--bg-secondary);
    border-bottom: 1px solid var(--border-color);
    font-size: 12px;
    flex-shrink: 0;
  }
  .status-left {
    display: flex;
    align-items: center;
    gap: 8px;
  }
  .status-right {
    display: flex;
    align-items: center;
    gap: 8px;
    color: var(--text-secondary);
  }
  .status-indicator {
    width: 10px;
    height: 10px;
    border-radius: 50%;
    flex-shrink: 0;
  }
  .status-indicator.pulsing {
    animation: pulse 1.5s ease-in-out infinite;
  }
  @keyframes pulse {
    0%,
    100% {
      opacity: 1;
      box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.4);
    }
    50% {
      opacity: 0.7;
      box-shadow: 0 0 0 6px rgba(244, 67, 54, 0);
    }
  }
  .state-message {
    color: var(--text-primary);
  }
  .device-info {
    color: var(--text-secondary);
  }
  .separator {
    color: var(--text-muted);
  }
  .user-name {
    color: var(--accent-green);
    font-weight: 500;
  }
 </style>
--- a/src/lib/components/TranscriptionDisplay.svelte
+++ b/src/lib/components/TranscriptionDisplay.svelte
@@ -0,0 +1,110 @@
 <script lang="ts">
  import { transcriptionStore } from "$lib/stores/transcriptions";
  import { configStore } from "$lib/stores/config";
  let container: HTMLDivElement | undefined = $state();
  let showTimestamps = $derived(configStore.config.display.show_timestamps);
  let items = $derived(transcriptionStore.items);
  $effect(() => {
    // Trigger on items length change to auto-scroll
    const _len = items.length;
    if (container) {
      requestAnimationFrame(() => {
        if (container) {
          container.scrollTop = container.scrollHeight;
        }
      });
    }
  });
 </script>
 <div class="transcription-display" bind:this={container}>
  {#each items as item (item.id)}
    <div class="transcription-item" class:preview={item.isPreview}>
      {#if showTimestamps && item.timestamp}
        <span class="timestamp">[{item.timestamp}]</span>
      {/if}
      {#if item.userName}
        <span class="user-name">{item.userName}:</span>
      {/if}
      {#if item.isPreview}
        <span class="preview-indicator">[...]</span>
      {/if}
      <span class="text">{item.text}</span>
    </div>
  {:else}
    <div class="empty-state">
      Transcriptions will appear here...
    </div>
  {/each}
 </div>
 <style>
  .transcription-display {
    flex: 1;
    overflow-y: auto;
    padding: 12px 20px;
    display: flex;
    flex-direction: column;
    gap: 6px;
  }
  .transcription-item {
    padding: 6px 10px;
    border-radius: 4px;
    background-color: rgba(255, 255, 255, 0.03);
    animation: fadeIn 0.2s ease-out;
    line-height: 1.6;
    word-wrap: break-word;
  }
  .transcription-item.preview {
    font-style: italic;
    opacity: 0.7;
  }
  .timestamp {
    color: #888;
    font-size: 0.85em;
    margin-right: 8px;
    font-family: monospace;
  }
  .user-name {
    color: #4caf50;
    font-weight: 700;
    margin-right: 6px;
  }
  .preview-indicator {
    color: #888;
    font-size: 0.85em;
    margin-right: 4px;
  }
  .text {
    color: #ffffff;
  }
  .empty-state {
    display: flex;
    align-items: center;
    justify-content: center;
    height: 100%;
    color: var(--text-muted);
    font-size: 15px;
    font-style: italic;
  }
  @keyframes fadeIn {
    from {
      opacity: 0;
      transform: translateY(4px);
    }
    to {
      opacity: 1;
      transform: translateY(0);
    }
  }
 </style>
--- a/src/lib/stores/backend.svelte.ts
+++ b/src/lib/stores/backend.svelte.ts
@@ -0,0 +1,310 @@
 /**
 * Backend store - manages WebSocket connection and REST API communication
 * with the Python backend server running on localhost.
 *
 * The backend port defaults to 8081 but can be updated at runtime via
 * `setPort()`. The WebSocket connects to /ws/control for real-time push
 * of transcriptions, previews, and state changes.
 */
 export type ConnectionState = "connecting" | "connected" | "disconnected" | "error";
 export type AppState = "initializing" | "ready" | "transcribing" | "reloading" | "error";
 interface BackendState {
  port: number;
  connectionState: ConnectionState;
  appState: AppState;
  stateMessage: string;
  deviceInfo: string;
  wsConnection: WebSocket | null;
  version: string;
  lastError: string;
 }
 let state = $state<BackendState>({
  port: 8081,
  connectionState: "disconnected",
  appState: "initializing",
  stateMessage: "Connecting to backend...",
  deviceInfo: "",
  wsConnection: null,
  version: "1.4.0",
  lastError: "",
 });
 let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
 let reconnectAttempts = 0;
 const MAX_RECONNECT_DELAY_MS = 30_000;
 const BASE_RECONNECT_DELAY_MS = 1_000;
 // ── URL helpers ──────────────────────────────────────────────────────
 function apiUrl(path: string): string {
  const normalised = path.startsWith("/") ? path : `/${path}`;
  return `http://localhost:${state.port}${normalised}`;
 }
 async function apiFetch(path: string, options?: RequestInit): Promise<Response> {
  const url = apiUrl(path);
  const method = options?.method?.toUpperCase() ?? "GET";
  const headers = new Headers(options?.headers);
  if (method !== "GET" && !headers.has("Content-Type")) {
    headers.set("Content-Type", "application/json");
  }
  return fetch(url, { ...options, headers });
 }
 // ── Status polling ──────────────────────────────────────────────────
 let statusPollTimer: ReturnType<typeof setTimeout> | null = null;
 async function pollStatus() {
  try {
    const resp = await fetch(apiUrl("/api/status"));
    if (resp.ok) {
      const data = await resp.json();
      if (data.state) {
        state.appState = data.state as AppState;
      }
      if (data.engine_device) {
        state.deviceInfo = data.engine_device;
      }
      if (data.version) {
        state.version = data.version;
      }
    }
  } catch {
    // API not ready yet, will retry
  }
  // Keep polling every 2s while still initializing
  if (state.appState === "initializing" && state.connectionState === "connected") {
    statusPollTimer = setTimeout(pollStatus, 2000);
  }
 }
 // ── WebSocket management ─────────────────────────────────────────────
 function connectWebSocket() {
  // Tear down any existing connection
  disconnect();
  state.connectionState = "connecting";
  reconnectAttempts = 0;
  _openSocket();
 }
 function _openSocket() {
  const wsUrl = `ws://localhost:${state.port}/ws/control`;
  try {
    const ws = new WebSocket(wsUrl);
    ws.onopen = () => {
      state.connectionState = "connected";
      state.lastError = "";
      reconnectAttempts = 0;
      if (reconnectTimer) {
        clearTimeout(reconnectTimer);
        reconnectTimer = null;
      }
      // Poll status to catch engine ready state that may have been
      // missed (engine can finish before WebSocket connects)
      pollStatus();
    };
    ws.onmessage = (event) => {
      try {
        const data = JSON.parse(event.data);
        handleWebSocketMessage(data);
      } catch {
        // ignore parse errors
      }
    };
    ws.onclose = () => {
      state.wsConnection = null;
      if (state.connectionState !== "disconnected") {
        state.connectionState = "error";
        state.stateMessage = "Disconnected from backend";
        _scheduleReconnect();
      }
    };
    ws.onerror = () => {
      state.lastError = "WebSocket error";
      // onclose fires after this, which handles reconnect
    };
    state.wsConnection = ws;
  } catch {
    state.connectionState = "error";
    state.stateMessage = "Failed to connect";
    _scheduleReconnect();
  }
 }
 function _scheduleReconnect() {
  if (reconnectTimer) return;
  const delay = Math.min(
    BASE_RECONNECT_DELAY_MS * Math.pow(2, reconnectAttempts),
    MAX_RECONNECT_DELAY_MS,
  );
  reconnectAttempts++;
  reconnectTimer = setTimeout(() => {
    reconnectTimer = null;
    if (state.connectionState !== "disconnected") {
      state.connectionState = "connecting";
      _openSocket();
    }
  }, delay);
 }
 function disconnect() {
  if (statusPollTimer) {
    clearTimeout(statusPollTimer);
    statusPollTimer = null;
  }
  if (reconnectTimer) {
    clearTimeout(reconnectTimer);
    reconnectTimer = null;
  }
  state.connectionState = "disconnected";
  if (state.wsConnection) {
    const ws = state.wsConnection;
    ws.onclose = null;
    ws.onerror = null;
    ws.close();
    state.wsConnection = null;
  }
 }
 // ── WebSocket message handling ───────────────────────────────────────
 function handleWebSocketMessage(data: Record<string, unknown>) {
  // Handle state changes locally
  if (data.type === "state_changed") {
    if (data.state) {
      state.appState = data.state as AppState;
    }
    if (data.message) {
      state.stateMessage = data.message as string;
    }
  }
  if (data.type === "error") {
    state.lastError = (data.message as string) ?? "Unknown error";
  }
  // Dispatch to window for other stores (transcriptions, etc.)
  if (data.type === "transcription") {
    window.dispatchEvent(
      new CustomEvent("backend:transcription", { detail: data })
    );
  } else if (data.type === "preview") {
    window.dispatchEvent(
      new CustomEvent("backend:preview", { detail: data })
    );
  } else if (data.type === "credits_low") {
    window.dispatchEvent(
      new CustomEvent("backend:credits_low", { detail: data })
    );
  }
 }
 // ── Port management ──────────────────────────────────────────────────
 function setPort(newPort: number) {
  if (newPort === state.port) return;
  state.port = newPort;
  // Reconnect with new port if we had a connection
  if (state.connectionState !== "disconnected") {
    connectWebSocket();
  }
 }
 // ── Typed REST helpers ───────────────────────────────────────────────
 async function apiGet<T = unknown>(path: string): Promise<T> {
  const resp = await apiFetch(path);
  if (!resp.ok) throw new Error(`GET ${path} failed: ${resp.status}`);
  return resp.json();
 }
 async function apiPost<T = unknown>(
  path: string,
  body?: unknown
 ): Promise<T> {
  const resp = await apiFetch(path, {
    method: "POST",
    body: body !== undefined ? JSON.stringify(body) : undefined,
  });
  if (!resp.ok) throw new Error(`POST ${path} failed: ${resp.status}`);
  return resp.json();
 }
 async function apiPut<T = unknown>(
  path: string,
  body?: unknown
 ): Promise<T> {
  const resp = await apiFetch(path, {
    method: "PUT",
    body: body !== undefined ? JSON.stringify(body) : undefined,
  });
  if (!resp.ok) throw new Error(`PUT ${path} failed: ${resp.status}`);
  return resp.json();
 }
 // ── Public API ───────────────────────────────────────────────────────
 export const backendStore = {
  get port() {
    return state.port;
  },
  get connectionState() {
    return state.connectionState;
  },
  get connected() {
    return state.connectionState === "connected";
  },
  get appState() {
    return state.appState;
  },
  get stateMessage() {
    return state.stateMessage;
  },
  get deviceInfo() {
    return state.deviceInfo;
  },
  get version() {
    return state.version;
  },
  get lastError() {
    return state.lastError;
  },
  get apiBaseUrl() {
    return `http://localhost:${state.port}`;
  },
  get wsUrl() {
    return `ws://localhost:${state.port}/ws/control`;
  },
  get obsUrl() {
    // OBS display runs on the web server port (one below the API port)
    const obsPort = state.port > 0 ? state.port - 1 : 8080;
    return `http://localhost:${obsPort}`;
  },
  get syncUrl() {
    return "";
  },
  setPort,
  connect: connectWebSocket,
  disconnect,
  apiUrl,
  apiFetch,
  apiGet,
  apiPost,
  apiPut,
 };
--- a/src/lib/stores/backend.test.ts
+++ b/src/lib/stores/backend.test.ts
@@ -0,0 +1,77 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 import { backendStore } from "./backend.svelte.ts";
 // Mock WebSocket globally so the store module can reference it
 class MockWebSocket {
  onopen: ((ev: Event) => void) | null = null;
  onclose: ((ev: CloseEvent) => void) | null = null;
  onmessage: ((ev: MessageEvent) => void) | null = null;
  onerror: ((ev: Event) => void) | null = null;
  close = vi.fn();
 }
 vi.stubGlobal("WebSocket", MockWebSocket);
 // Mock fetch to prevent real network calls
 vi.stubGlobal(
  "fetch",
  vi.fn(() =>
    Promise.resolve({
      ok: true,
      json: () => Promise.resolve({}),
    })
  )
 );
 describe("backend store", () => {
  beforeEach(() => {
    backendStore.disconnect();
    backendStore.setPort(8081);
  });
  it("test_exports_expected_properties", () => {
    expect(backendStore).toHaveProperty("port");
    expect(backendStore).toHaveProperty("connectionState");
    expect(backendStore).toHaveProperty("connected");
    expect(backendStore).toHaveProperty("appState");
    expect(backendStore).toHaveProperty("stateMessage");
    expect(backendStore).toHaveProperty("deviceInfo");
    expect(backendStore).toHaveProperty("version");
    expect(backendStore).toHaveProperty("lastError");
    expect(backendStore).toHaveProperty("apiBaseUrl");
    expect(backendStore).toHaveProperty("wsUrl");
    expect(backendStore).toHaveProperty("obsUrl");
    expect(backendStore).toHaveProperty("syncUrl");
  });
  it("test_exports_expected_methods", () => {
    expect(typeof backendStore.setPort).toBe("function");
    expect(typeof backendStore.connect).toBe("function");
    expect(typeof backendStore.disconnect).toBe("function");
    expect(typeof backendStore.apiUrl).toBe("function");
    expect(typeof backendStore.apiFetch).toBe("function");
    expect(typeof backendStore.apiGet).toBe("function");
    expect(typeof backendStore.apiPost).toBe("function");
    expect(typeof backendStore.apiPut).toBe("function");
  });
  it("test_obsUrl_derives_from_port", () => {
    backendStore.setPort(8081);
    expect(backendStore.obsUrl).toBe("http://localhost:8080");
  });
  it("test_apiBaseUrl_uses_port", () => {
    backendStore.setPort(8081);
    expect(backendStore.apiBaseUrl).toBe("http://localhost:8081");
  });
  it("test_wsUrl_uses_port", () => {
    backendStore.setPort(8081);
    expect(backendStore.wsUrl).toBe("ws://localhost:8081/ws/control");
  });
  it("test_initial_state", () => {
    // After disconnect() in beforeEach, state should be disconnected
    expect(backendStore.connectionState).toBe("disconnected");
    expect(backendStore.appState).toBe("initializing");
  });
 });
--- a/src/lib/stores/config.svelte.ts
+++ b/src/lib/stores/config.svelte.ts
@@ -0,0 +1,243 @@
 /**
 * Config store - manages application configuration loaded from
 * and saved to the Python backend via the backend store's API helpers.
 *
 * The backend accepts PUT /api/config with `{ settings: { "dot.key": value } }`.
 */
 import { backendStore } from "$lib/stores/backend";
 export interface AppConfig {
  user: {
    name: string;
    id: string;
  };
  audio: {
    input_device: string;
    sample_rate: number;
  };
  transcription: {
    model: string;
    device: string;
    language: string;
    compute_type: string;
    enable_realtime_transcription: boolean;
    realtime_model: string;
    realtime_processing_pause: number;
    silero_sensitivity: number;
    silero_use_onnx: boolean;
    webrtc_sensitivity: number;
    post_speech_silence_duration: number;
    min_length_of_recording: number;
    min_gap_between_recordings: number;
    pre_recording_buffer_duration: number;
    beam_size: number;
    initial_prompt: string;
    no_log_file: boolean;
    continuous_mode: boolean;
  };
  server_sync: {
    enabled: boolean;
    url: string;
    room: string;
    passphrase: string;
  };
  display: {
    show_timestamps: boolean;
    max_lines: number;
    font_source: string;
    font_family: string;
    websafe_font: string;
    google_font: string;
    custom_font_file: string;
    font_size: number;
    theme: string;
    fade_after_seconds: number;
    user_color: string;
    text_color: string;
    background_color: string;
  };
  web_server: {
    port: number;
    host: string;
  };
  remote: {
    mode: string;
    server_url: string;
    auth_token: string;
    byok_api_key: string;
    deepgram_model: string;
    language: string;
    fallback_to_local: boolean;
  };
  updates: {
    auto_check: boolean;
    gitea_url: string;
    owner: string;
    repo: string;
    skipped_versions: string[];
    last_check: string;
    check_interval_hours: number;
  };
 }
 function getDefaultConfig(): AppConfig {
  return {
    user: { name: "User", id: "" },
    audio: { input_device: "default", sample_rate: 16000 },
    transcription: {
      model: "base.en",
      device: "auto",
      language: "en",
      compute_type: "default",
      enable_realtime_transcription: false,
      realtime_model: "tiny.en",
      realtime_processing_pause: 0.1,
      silero_sensitivity: 0.4,
      silero_use_onnx: true,
      webrtc_sensitivity: 3,
      post_speech_silence_duration: 0.3,
      min_length_of_recording: 0.5,
      min_gap_between_recordings: 0,
      pre_recording_buffer_duration: 0.2,
      beam_size: 5,
      initial_prompt: "",
      no_log_file: true,
      continuous_mode: false,
    },
    server_sync: {
      enabled: false,
      url: "http://localhost:3000/api/send",
      room: "default",
      passphrase: "",
    },
    display: {
      show_timestamps: true,
      max_lines: 100,
      font_source: "System Font",
      font_family: "Courier",
      websafe_font: "Arial",
      google_font: "Roboto",
      custom_font_file: "",
      font_size: 12,
      theme: "dark",
      fade_after_seconds: 10,
      user_color: "#4CAF50",
      text_color: "#FFFFFF",
      background_color: "#000000B3",
    },
    web_server: { port: 8080, host: "127.0.0.1" },
    remote: {
      mode: "local",
      server_url: "",
      auth_token: "",
      byok_api_key: "",
      deepgram_model: "nova-2",
      language: "en-US",
      fallback_to_local: true,
    },
    updates: {
      auto_check: true,
      gitea_url: "https://repo.anhonesthost.net",
      owner: "streamer-tools",
      repo: "local-transcription",
      skipped_versions: [],
      last_check: "",
      check_interval_hours: 24,
    },
  };
 }
 let config = $state<AppConfig>(getDefaultConfig());
 let loading = $state(false);
 let error = $state("");
 /**
 * Fetch the full configuration tree from the backend.
 * GET /api/config
 */
 async function fetchConfig(): Promise<void> {
  loading = true;
  error = "";
  try {
    const data = await backendStore.apiGet<Record<string, unknown>>("/api/config");
    // Deep merge with defaults to ensure all keys exist
    config = deepMerge(getDefaultConfig(), data) as AppConfig;
  } catch (err) {
    error = err instanceof Error ? err.message : String(err);
    console.error("[config] fetchConfig failed:", error);
  } finally {
    loading = false;
  }
 }
 function deepMerge(target: Record<string, unknown>, source: Record<string, unknown>): Record<string, unknown> {
  const result = { ...target };
  for (const key of Object.keys(source)) {
    if (
      source[key] &&
      typeof source[key] === "object" &&
      !Array.isArray(source[key]) &&
      target[key] &&
      typeof target[key] === "object" &&
      !Array.isArray(target[key])
    ) {
      result[key] = deepMerge(
        target[key] as Record<string, unknown>,
        source[key] as Record<string, unknown>
      );
    } else {
      result[key] = source[key];
    }
  }
  return result;
 }
 /**
 * Send a batch of setting updates to the backend.
 * PUT /api/config with body `{ settings: { "dot.key": value, ... } }`
 *
 * Keys use dot-notation, e.g. `{ "transcription.model": "small.en" }`.
 *
 * Returns the response payload on success, or throws on failure.
 */
 async function updateConfig(
  settings: Record<string, unknown>,
 ): Promise<{ status: string; message: string; engine_reloaded: boolean }> {
  loading = true;
  error = "";
  try {
    const result = await backendStore.apiPut<{
      status: string;
      message: string;
      engine_reloaded: boolean;
    }>("/api/config", { settings });
    // Refresh the local config tree so the UI stays in sync
    await fetchConfig();
    return result;
  } catch (err) {
    error = err instanceof Error ? err.message : String(err);
    console.error("[config] updateConfig failed:", error);
    throw err;
  } finally {
    loading = false;
  }
 }
 export const configStore = {
  get config() {
    return config;
  },
  get loading() {
    return loading;
  },
  get error() {
    return error;
  },
  fetchConfig,
  updateConfig,
 };
--- a/src/lib/stores/config.test.ts
+++ b/src/lib/stores/config.test.ts
@@ -0,0 +1,48 @@
 import { describe, it, expect, vi } from "vitest";
 // Mock fetch so the backend store module doesn't make real requests
 vi.stubGlobal(
  "fetch",
  vi.fn(() =>
    Promise.resolve({
      ok: true,
      json: () => Promise.resolve({}),
    })
  )
 );
 // Mock WebSocket for the backend store dependency
 class MockWebSocket {
  onopen: ((ev: Event) => void) | null = null;
  onclose: ((ev: CloseEvent) => void) | null = null;
  onmessage: ((ev: MessageEvent) => void) | null = null;
  onerror: ((ev: Event) => void) | null = null;
  close = vi.fn();
 }
 vi.stubGlobal("WebSocket", MockWebSocket);
 import { configStore } from "./config.svelte.ts";
 describe("config store", () => {
  it("test_has_fetchConfig_method", () => {
    expect(typeof configStore.fetchConfig).toBe("function");
  });
  it("test_has_updateConfig_method", () => {
    expect(typeof configStore.updateConfig).toBe("function");
  });
  it("test_config_defaults_have_expected_keys", () => {
    const cfg = configStore.config;
    expect(cfg).toHaveProperty("user");
    expect(cfg).toHaveProperty("audio");
    expect(cfg).toHaveProperty("transcription");
    expect(cfg).toHaveProperty("display");
    expect(cfg).toHaveProperty("remote");
    expect(cfg).toHaveProperty("updates");
  });
  it("test_remote_config_has_byok_api_key", () => {
    expect(configStore.config.remote.byok_api_key).toBeDefined();
  });
 });
--- a/src/lib/stores/file-extension.test.ts
+++ b/src/lib/stores/file-extension.test.ts
@@ -0,0 +1,34 @@
 import { describe, it, expect } from "vitest";
 import * as fs from "node:fs";
 import * as path from "node:path";
 describe("store file extensions", () => {
  it("test_store_files_use_svelte_ts_extension", () => {
    const storesDir = path.resolve(__dirname);
    const files = fs.readdirSync(storesDir);
    // Find .ts files that are NOT .svelte.ts and NOT test files
    const plainTsFiles = files.filter(
      (f) =>
        f.endsWith(".ts") &&
        !f.endsWith(".svelte.ts") &&
        !f.endsWith(".test.ts")
    );
    for (const file of plainTsFiles) {
      const content = fs.readFileSync(path.join(storesDir, file), "utf-8");
      expect(content).not.toMatch(
        /\$state\s*[<(]/,
        `${file} uses $state() but does not have .svelte.ts extension`
      );
      expect(content).not.toMatch(
        /\$derived\s*[<(]/,
        `${file} uses $derived() but does not have .svelte.ts extension`
      );
      expect(content).not.toMatch(
        /\$effect\s*[<(]/,
        `${file} uses $effect() but does not have .svelte.ts extension`
      );
    }
  });
 });
--- a/src/lib/stores/transcriptions.svelte.ts
+++ b/src/lib/stores/transcriptions.svelte.ts
@@ -0,0 +1,109 @@
 /**
 * Transcriptions store - manages the list of transcription items
 * received from the backend via WebSocket.
 */
 export interface TranscriptionItem {
  id: string;
  text: string;
  userName: string;
  timestamp: string;
  isPreview: boolean;
 }
 let items = $state<TranscriptionItem[]>([]);
 let nextId = 0;
 function generateId(): string {
  return `t-${Date.now()}-${nextId++}`;
 }
 function addTranscription(data: {
  text?: string;
  user_name?: string;
  timestamp?: string;
 }) {
  // When a final transcription arrives, remove any existing preview
  const previewIndex = items.findIndex((item) => item.isPreview);
  if (previewIndex !== -1) {
    items.splice(previewIndex, 1);
  }
  items.push({
    id: generateId(),
    text: data.text ?? "",
    userName: data.user_name ?? "",
    timestamp: data.timestamp ?? "",
    isPreview: false,
  });
  // Keep a reasonable limit
  if (items.length > 500) {
    items.splice(0, items.length - 500);
  }
 }
 function setPreview(data: {
  text?: string;
  user_name?: string;
  timestamp?: string;
 }) {
  const existingIndex = items.findIndex((item) => item.isPreview);
  const previewItem: TranscriptionItem = {
    id: existingIndex !== -1 ? items[existingIndex].id : generateId(),
    text: data.text ?? "",
    userName: data.user_name ?? "",
    timestamp: data.timestamp ?? "",
    isPreview: true,
  };
  if (existingIndex !== -1) {
    items[existingIndex] = previewItem;
  } else {
    items.push(previewItem);
  }
 }
 function clearAll() {
  items.length = 0;
 }
 function getPlainText(): string {
  return items
    .filter((item) => !item.isPreview)
    .map((item) => {
      let line = "";
      if (item.timestamp) line += `[${item.timestamp}] `;
      if (item.userName) line += `${item.userName}: `;
      line += item.text;
      return line;
    })
    .join("\n");
 }
 // Listen for backend events
 if (typeof window !== "undefined") {
  window.addEventListener("backend:transcription", ((e: CustomEvent) => {
    addTranscription(e.detail);
  }) as EventListener);
  window.addEventListener("backend:preview", ((e: CustomEvent) => {
    setPreview(e.detail);
  }) as EventListener);
 }
 export const transcriptionStore = {
  get items() {
    return items;
  },
  get currentPreview(): TranscriptionItem | null {
    return items.find((item) => item.isPreview) ?? null;
  },
  get transcriptions(): TranscriptionItem[] {
    return items.filter((item) => !item.isPreview);
  },
  addTranscription,
  setPreview,
  clearAll,
  getPlainText,
 };
--- a/src/lib/stores/transcriptions.test.ts
+++ b/src/lib/stores/transcriptions.test.ts
@@ -0,0 +1,71 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 // Mock WebSocket for the backend store dependency (loaded transitively)
 class MockWebSocket {
  onopen: ((ev: Event) => void) | null = null;
  onclose: ((ev: CloseEvent) => void) | null = null;
  onmessage: ((ev: MessageEvent) => void) | null = null;
  onerror: ((ev: Event) => void) | null = null;
  close = vi.fn();
 }
 vi.stubGlobal("WebSocket", MockWebSocket);
 vi.stubGlobal(
  "fetch",
  vi.fn(() =>
    Promise.resolve({
      ok: true,
      json: () => Promise.resolve({}),
    })
  )
 );
 import { transcriptionStore } from "./transcriptions.svelte.ts";
 describe("transcriptions store", () => {
  beforeEach(() => {
    transcriptionStore.clearAll();
  });
  it("test_addTranscription", () => {
    transcriptionStore.addTranscription({
      text: "Hello world",
      user_name: "TestUser",
      timestamp: "12:00:00",
    });
    expect(transcriptionStore.items.length).toBe(1);
    expect(transcriptionStore.items[0].text).toBe("Hello world");
    expect(transcriptionStore.items[0].userName).toBe("TestUser");
    expect(transcriptionStore.items[0].timestamp).toBe("12:00:00");
    expect(transcriptionStore.items[0].isPreview).toBe(false);
  });
  it("test_clearAll", () => {
    transcriptionStore.addTranscription({ text: "One" });
    transcriptionStore.addTranscription({ text: "Two" });
    expect(transcriptionStore.items.length).toBe(2);
    transcriptionStore.clearAll();
    expect(transcriptionStore.items.length).toBe(0);
  });
  it("test_getPlainText", () => {
    transcriptionStore.addTranscription({
      text: "Hello",
      user_name: "Alice",
      timestamp: "10:00",
    });
    transcriptionStore.addTranscription({
      text: "World",
      user_name: "Bob",
      timestamp: "10:01",
    });
    const text = transcriptionStore.getPlainText();
    expect(text).toContain("[10:00] Alice: Hello");
    expect(text).toContain("[10:01] Bob: World");
    // Lines separated by newline
    expect(text.split("\n").length).toBe(2);
  });
 });
--- a/src/main.ts
+++ b/src/main.ts
@@ -0,0 +1,6 @@
 import App from "./App.svelte";
 import { mount } from "svelte";
 import "./app.css";
 const app = mount(App, { target: document.getElementById("app")! });
 export default app;
--- a/svelte.config.js
+++ b/svelte.config.js
@@ -0,0 +1,5 @@
 import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 export default {
  preprocess: vitePreprocess(),
 };
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -0,0 +1,15 @@
 {
  "extends": "@tsconfig/svelte/tsconfig.json",
  "compilerOptions": {
    "target": "ESNext",
    "useDefineForClassFields": true,
    "module": "ESNext",
    "resolveJsonModule": true,
    "allowJs": true,
    "checkJs": true,
    "isolatedModules": true,
    "moduleDetection": "force",
    "strict": true
  },
  "include": ["src/**/*.ts", "src/**/*.svelte"]
 }
--- a/version.py
+++ b/version.py
@@ -1,7 +1,7 @@
 """Version information for Local Transcription."""
-__version__ = "1.4.0"
+__version__ = "2.0.1"
-__version_info__ = (1, 4, 0)
+__version_info__ = (2, 0, 1)
 # Version history:
 # 1.4.0 - Auto-update feature:
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -0,0 +1,26 @@
 import { defineConfig } from "vite";
 import { svelte } from "@sveltejs/vite-plugin-svelte";
 import path from "path";
 // https://vitejs.dev/config/
 export default defineConfig({
  plugins: [svelte()],
  clearScreen: false,
  resolve: {
    alias: {
      $lib: path.resolve("./src/lib"),
    },
    extensions: [".svelte.ts", ".ts", ".svelte", ".js", ".mjs", ".mts"],
  },
  server: {
    port: 1420,
    strictPort: true,
    watch: {
      ignored: ["**/src-tauri/**", "**/client/**", "**/server/**", "**/backend/**", "**/gui/**"],
    },
  },
  test: {
    environment: "jsdom",
    include: ["src/**/*.test.ts"],
  },
 });
		`@@ -0,0 +1 @@`
							`"""Backend package for headless transcription service."""`