2026-04-06 20:45:10 +00:00
50 changed files with 18565 additions and 371 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,9 @@
 {
  "permissions": {
    "allow": [
      "Bash(python3:*)",
      "Bash(node --check:*)",
      "Bash(ls:*)"
    ]
  }
 }
--- a/.gitea/workflows/build-sidecar.yml
+++ b/.gitea/workflows/build-sidecar.yml
@@ -0,0 +1,414 @@
 name: Build Sidecars
 on:
  push:
    branches: [main]
    paths:
      - 'client/**'
      - 'server/**'
      - 'backend/**'
      - 'pyproject.toml'
      - 'local-transcription-headless.spec'
  workflow_dispatch:
 jobs:
  bump-sidecar-version:
    name: Bump sidecar version and tag
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    outputs:
      version: ${{ steps.bump.outputs.version }}
      tag: ${{ steps.bump.outputs.tag }}
      has_changes: ${{ steps.check_changes.outputs.has_changes }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 2
      - name: Check for backend changes
        id: check_changes
        run: |
          # If triggered by workflow_dispatch, always build
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            exit 0
          fi
          # Check if relevant files changed in this commit
          CHANGED=$(git diff --name-only HEAD~1 HEAD -- client/ server/ backend/ pyproject.toml local-transcription-headless.spec 2>/dev/null || echo "")
          if [ -n "$CHANGED" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            echo "Backend changes detected: $CHANGED"
          else
            echo "has_changes=false" >> $GITHUB_OUTPUT
            echo "No backend changes detected, skipping sidecar build"
          fi
      - name: Configure git
        if: steps.check_changes.outputs.has_changes == 'true'
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump sidecar patch version
        if: steps.check_changes.outputs.has_changes == 'true'
        id: bump
        run: |
          # Read current version from pyproject.toml
          CURRENT=$(grep '^version = ' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
          echo "Current sidecar version: ${CURRENT}"
          # Increment patch number
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New sidecar version: ${NEW_VERSION}"
          # Update pyproject.toml
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
          # Update version.py
          sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
          sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
          echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.version }}"
          TAG="${{ steps.bump.outputs.tag }}"
          git add pyproject.toml version.py
          git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
          git tag "${TAG}"
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "${TAG}"
      - name: Create Gitea release
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          VERSION="${{ steps.bump.outputs.version }}"
          RELEASE_NAME="Sidecar v${VERSION}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
  # ── Linux sidecar (CUDA + CPU) ──
  build-sidecar-linux:
    name: Build Sidecar (Linux)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y portaudio19-dev
      - name: Build sidecar (CUDA)
        run: |
          uv sync
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CUDA)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cuda.zip .
      - name: Build sidecar (CPU)
        run: |
          rm -rf dist/local-transcription-backend build/
          # Install CPU-only PyTorch
          uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CPU)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
  # ── Windows sidecar (CUDA + CPU) ──
  build-sidecar-windows:
    name: Build Sidecar (Windows)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: windows-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        shell: powershell
        run: |
          if (Get-Command uv -ErrorAction SilentlyContinue) {
            Write-Host "uv already installed: $(uv --version)"
          } else {
            irm https://astral.sh/uv/install.ps1 | iex
            echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          }
      - name: Set up Python
        shell: powershell
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install 7-Zip
        shell: powershell
        run: |
          if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
            choco install 7zip -y
          }
      - name: Build sidecar (CUDA)
        shell: powershell
        run: |
          uv sync
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CUDA)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\dist\local-transcription-backend\*
      - name: Build sidecar (CPU)
        shell: powershell
        run: |
          Remove-Item -Recurse -Force dist\local-transcription-backend, build -ErrorAction SilentlyContinue
          uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CPU)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\dist\local-transcription-backend\*
      - name: Upload to sidecar release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
          Write-Host "Waiting for sidecar release ${TAG} to be available..."
          $RELEASE_ID = $null
          for ($i = 1; $i -le 30; $i++) {
            try {
              $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
              $RELEASE_ID = $release.id
              if ($RELEASE_ID) {
                Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
                break
              }
            } catch {}
            Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            Start-Sleep -Seconds 10
          }
          if (-not $RELEASE_ID) {
            Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          }
          Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading ${filename} (${size} MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: ${filename}"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
            }
          }
  # ── macOS sidecar (CPU only — no CUDA on macOS) ──
  build-sidecar-macos:
    name: Build Sidecar (macOS)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: macos-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install system dependencies
        run: brew install portaudio
      - name: Build sidecar (CPU)
        run: |
          # Install CPU-only PyTorch for macOS (MPS support included in default torch)
          uv sync
          uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
          uv run pyinstaller local-transcription-headless.spec
      - name: Package sidecar (CPU)
        run: |
          cd dist/local-transcription-backend && zip -r ../../sidecar-macos-aarch64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,300 @@
 name: Release
 on:
  push:
    branches: [main]
 jobs:
  bump-version:
    name: Bump version and tag
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    outputs:
      new_version: ${{ steps.bump.outputs.new_version }}
      tag: ${{ steps.bump.outputs.tag }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Configure git
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump patch version
        id: bump
        run: |
          # Read current version from package.json
          CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
          echo "Current version: ${CURRENT}"
          # Increment patch number
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New version: ${NEW_VERSION}"
          # Update package.json
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
          # Update src-tauri/tauri.conf.json
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
          # Update src-tauri/Cargo.toml
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
          # Update version.py
          sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
          sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
          echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.new_version }}"
          git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml version.py
          git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
          git tag "v${NEW_VERSION}"
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "v${NEW_VERSION}"
      - name: Create Gitea release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          RELEASE_NAME="Local Transcription ${TAG}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
  # ── Platform builds (run after version bump) ──
  build-linux:
    name: Build App (Linux)
    needs: bump-version
    runs-on: ubuntu-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-version.outputs.tag }}"
          echo "Release tag: ${TAG}"
          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG}."
            exit 1
          fi
          echo "Release ID: ${RELEASE_ID}"
          find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" -o -name "*.AppImage" \) | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
  build-windows:
    name: Build App (Windows)
    needs: bump-version
    runs-on: windows-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        shell: powershell
        run: |
          if (Get-Command rustup -ErrorAction SilentlyContinue) {
            rustup default stable
          } else {
            Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
            .\rustup-init.exe -y --default-toolchain stable
            echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          }
      - name: Install npm dependencies
        shell: powershell
        run: npm ci
      - name: Build Tauri app
        shell: powershell
        run: npm run tauri build
      - name: Upload to release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = "${{ needs.bump-version.outputs.tag }}"
          Write-Host "Release tag: ${TAG}"
          $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
          $RELEASE_ID = $release.id
          Write-Host "Release ID: ${RELEASE_ID}"
          Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading ${filename} (${size} MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: ${filename}"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
            }
          }
  build-macos:
    name: Build App (macOS)
    needs: bump-version
    runs-on: macos-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: brew install --quiet create-dmg || true
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-version.outputs.tag }}"
          echo "Release tag: ${TAG}"
          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG}."
            exit 1
          fi
          echo "Release ID: ${RELEASE_ID}"
          find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitignore
+++ b/.gitignore
@@ -10,8 +10,8 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+/lib/
-lib64/
+/lib64/
 parts/
 sdist/
 var/
@@ -54,3 +54,12 @@ models/
 # PyInstaller
 *.spec.lock
 # Node.js
 node_modules/
 # Vite / Svelte build output
 dist/
 # Tauri
 src-tauri/target/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,52 +4,108 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Project Overview
-Local Transcription is a desktop application for real-time speech-to-text transcription designed for streamers. It uses Whisper models (via faster-whisper) to transcribe audio locally with optional multi-user server synchronization.
+Local Transcription is a cross-platform desktop application for real-time speech-to-text transcription designed for streamers. It supports local Whisper models and cloud-based Deepgram transcription, with OBS browser source integration and optional multi-user sync.
 **Architecture:** Two-process model — a Tauri v2 shell (Svelte 5 frontend) communicates with a headless Python backend (sidecar) via REST API and WebSocket.
 **Key Features:**
- Standalone desktop GUI (PySide6/Qt)
+- Cross-platform desktop app (Windows, macOS, Linux) via Tauri v2 + Svelte 5
- Local transcription with CPU/GPU support
+- Headless Python backend with FastAPI control API
- Built-in web server for OBS browser source integration
+- Dual transcription modes: local Whisper or cloud Deepgram (managed/BYOK)
- Optional Node.js-based multi-user server for syncing transcriptions across users
+- Built-in web server for OBS browser source at `http://localhost:8080`
- Noise suppression and Voice Activity Detection (VAD)
+- Optional multi-user sync via Node.js server
- Cross-platform builds (Linux/Windows) with PyInstaller
+- CUDA, MPS (Apple Silicon), and CPU support
 - Auto-updates, custom fonts, configurable colors
 > **Legacy GUI:** The original PySide6/Qt GUI (`main.py`, `gui/`) still works during the transition. New features should target the Tauri frontend and headless backend.
 ## Project Structure
 ```
 local-transcription/
-├── client/                   # Core transcription logic
+├── src/                             # Svelte 5 frontend (Tauri UI)
-│   ├── audio_capture.py      # Audio input and buffering
+│   ├── App.svelte                   # Main app shell
-│   ├── transcription_engine.py # Whisper model integration
+│   ├── app.css                      # Global dark theme styles
 │   ├── main.ts                      # Svelte mount point
 │   ├── lib/components/              # UI components
 │   │   ├── Header.svelte            # Title bar + settings button
 │   │   ├── StatusBar.svelte         # State indicator, device, user info
 │   │   ├── Controls.svelte          # Start/Stop, Clear, Save buttons
 │   │   ├── TranscriptionDisplay.svelte  # Scrolling transcript view
 │   │   └── Settings.svelte          # Full settings modal (all sections)
 │   └── lib/stores/                  # Svelte 5 reactive stores ($state/$derived)
 │       ├── backend.ts               # WebSocket + REST API client
 │       ├── config.ts                # App configuration fetch/update
 │       └── transcriptions.ts        # Transcript data management
 ├── src-tauri/                       # Tauri v2 Rust shell
 │   ├── src/lib.rs                   # Plugin registration (shell, dialog, process)
 │   ├── src/main.rs                  # Entry point
 │   ├── tauri.conf.json              # Window, bundle, plugin config
 │   └── Cargo.toml                   # Rust dependencies
 ├── backend/                         # Headless Python backend (the sidecar)
 │   ├── app_controller.py            # Core orchestration (engine, sync, config)
 │   ├── api_server.py                # FastAPI REST endpoints + /ws/control
 │   └── main_headless.py             # Headless entry point (prints JSON to stdout)
 ├── client/                          # Core transcription modules (used by backend)
 │   ├── audio_capture.py             # Audio input handling
 │   ├── transcription_engine_realtime.py  # RealtimeSTT / Whisper engine
 │   ├── deepgram_transcription.py    # Deepgram WebSocket cloud transcription
 │   ├── noise_suppression.py         # VAD and noise reduction
-│   ├── device_utils.py       # CPU/GPU device management
+│   ├── device_utils.py              # CPU/GPU/MPS detection
-│   ├── config.py             # Configuration management
+│   ├── config.py                    # YAML config management (~/.local-transcription/)
-│   └── server_sync.py        # Multi-user server sync client
+│   ├── server_sync.py               # Multi-user server sync client
-├── gui/                      # Desktop application UI
+│   ├── instance_lock.py             # Single-instance PID lock
-│   ├── main_window_qt.py     # Main application window (PySide6)
+│   └── update_checker.py            # Gitea release update checker
-│   ├── settings_dialog_qt.py # Settings dialog (PySide6)
+├── gui/                             # Legacy PySide6/Qt GUI (still functional)
 │   ├── main_window_qt.py            # Main window (orchestration lives here in legacy)
 │   ├── settings_dialog_qt.py        # Settings dialog
 │   └── transcription_display_qt.py  # Display widget
-├── server/                   # Web display servers
+├── server/
-│   ├── web_display.py        # FastAPI server for OBS browser source (local)
+│   ├── web_display.py               # FastAPI OBS display server (WebSocket + HTML)
-│   └── nodejs/               # Optional multi-user Node.js server
+│   └── nodejs/                      # Optional multi-user sync server
-│       ├── server.js         # Multi-user sync server with WebSocket
+├── .gitea/workflows/                # CI/CD
-│       ├── package.json      # Node.js dependencies
+│   ├── release.yml                  # Tauri app builds (Linux/Windows/macOS)
-│       └── README.md         # Server deployment documentation
+│   └── build-sidecar.yml            # Python sidecar builds (CUDA + CPU)
-├── config/                   # Example configuration files
+├── config/default_config.yaml       # Default settings template
-│   └── default_config.yaml   # Default settings template
+├── main.py                          # Legacy PySide6 GUI entry point
 ├── main.py                   # GUI application entry point
 ├── main_cli.py                      # CLI version for testing
-└── pyproject.toml           # Dependencies and build config
+├── version.py                       # Version string (__version__)
 ├── local-transcription.spec         # PyInstaller config (legacy, includes PySide6)
 ├── local-transcription-headless.spec # PyInstaller config (headless sidecar, no Qt)
 ├── pyproject.toml                   # Python deps (uv, CUDA PyTorch index)
 ├── package.json                     # Node/Tauri deps
 └── vite.config.ts                   # Vite build config ($lib alias)
 ```
 ## Development Commands
-### Installation and Setup
+### Frontend (Tauri + Svelte)
 ```bash
-# Install dependencies (creates .venv automatically)
+# Install npm dependencies
 npm install
 # Run Tauri in development mode (hot-reload)
 npm run tauri dev
 # Build frontend only (for testing)
 npx vite build
 # Type-check Svelte
 npx svelte-check
 # Check Rust compiles
 cd src-tauri && cargo check
 ```
 ### Backend (Python)
 ```bash
 # Install Python dependencies
 uv sync
-# Run the GUI application
+# Run the headless backend standalone (for development)
 uv run python -m backend.main_headless --port 8080
 # Run the legacy PySide6 GUI
 uv run python main.py
 # Run CLI version (headless, for testing)
@@ -57,257 +113,154 @@ uv run python main_cli.py
 # List available audio devices
 uv run python main_cli.py --list-devices
 # Install with CUDA support (if needed)
 uv pip install torch --index-url https://download.pytorch.org/whl/cu121
 ```
-### Building Executables
+### Building
 ```bash
-# Linux (includes CUDA support - works on both GPU and CPU systems)
+# Build Tauri app (produces platform installer)
-./build.sh
+npm run tauri build
-# Windows (includes CUDA support - works on both GPU and CPU systems)
+# Build headless Python sidecar (no PySide6)
-build.bat
+uv run pyinstaller local-transcription-headless.spec
 # Output: dist/local-transcription-backend/
-# Manual build with PyInstaller
+# Build legacy PySide6 app
 uv sync                          # Install dependencies (includes CUDA PyTorch)
 uv pip uninstall -q enum34       # Remove incompatible enum34 package
 uv run pyinstaller local-transcription.spec
 # Or use: ./build.sh (Linux) / build.bat (Windows)
 ```
 **Important:** All builds include CUDA support via `pyproject.toml` configuration. CUDA builds can be created on systems without NVIDIA GPUs. The PyTorch CUDA runtime is bundled, and the app automatically falls back to CPU if no GPU is available.
 ### Testing
 ```bash
 # Run component tests
 uv run python test_components.py
 # Check CUDA availability
 uv run python check_cuda.py
 # Test web server manually
 uv run python -m uvicorn server.web_display:app --reload
 ```
-## Architecture
+## Architecture Details
-### Audio Processing Pipeline
+### Communication: Tauri <-> Python Backend
-1. **Audio Capture** ([client/audio_capture.py](client/audio_capture.py))
+The Svelte frontend connects to the Python backend via two channels:
   - Captures audio from microphone/system using sounddevice
   - Handles automatic sample rate detection and resampling
   - Uses chunking with overlap for better transcription quality
   - Default: 3-second chunks with 0.5s overlap
-2. **Noise Suppression** ([client/noise_suppression.py](client/noise_suppression.py))
+**REST API** (on port 8081 by default):
-   - Applies noisereduce for background noise reduction
+- `GET /api/status` — app state, device info, version
-   - Voice Activity Detection (VAD) using webrtcvad
+- `POST /api/start` / `POST /api/stop` — transcription control
-   - Skips silent segments to improve performance
+- `GET /api/config` / `PUT /api/config` — read/write settings (dot-notation keys)
 - `GET /api/audio-devices` / `GET /api/compute-devices` — device enumeration
 - `POST /api/reload-engine` — reload with new model/device
 - `GET /api/transcriptions` / `POST /api/clear` — transcript management
 - `POST /api/save-file` — write text to a file path
 - `GET /api/check-update` / `POST /api/skip-version` — update management
 - `POST /api/login` / `POST /api/register` / `GET /api/balance` — managed mode proxy
-3. **Transcription** ([client/transcription_engine.py](client/transcription_engine.py))
+**WebSocket** `/ws/control`:
-   - Uses faster-whisper for efficient inference
+- Pushes real-time events: `state_changed`, `transcription`, `preview`, `error`, `credits_low`
-   - Supports CPU, CUDA, and Apple MPS (Mac)
+- Client sends keepalive pings
   - Models: tiny, base, small, medium, large
   - Thread-safe model loading with locks
-4. **Display** ([gui/main_window_qt.py](gui/main_window_qt.py))
+The OBS display server runs separately on port 8080 (`GET /` for HTML, `WebSocket /ws` for transcriptions).
   - PySide6/Qt-based desktop GUI
   - Real-time transcription display with scrolling
   - Settings panel with live updates (no restart needed)
-### Web Server Architecture
+### Backend Process Lifecycle
-**Local Web Server** ([server/web_display.py](server/web_display.py))
+1. `main_headless.py` starts, acquires instance lock, creates `AppController`
- Always runs when GUI starts (port 8080 by default)
+2. `AppController.initialize()` starts the OBS web server (port 8080) and engine init thread
- FastAPI with WebSocket for real-time updates
+3. `APIServer` wraps the controller with FastAPI routes, runs on port 8081
- Used for OBS browser source integration
+4. Backend prints `{"event": "ready", "port": 8080}` to stdout for Tauri to discover
- Single-user (displays only local transcriptions)
+5. On shutdown: engine stopped, web server stopped, lock released
-**Multi-User Server** (Optional - for syncing across multiple users)
+### Headless Backend vs Legacy GUI
-**Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **RECOMMENDED**
+The `AppController` class (`backend/app_controller.py`) extracts all orchestration logic from `gui/main_window_qt.py` into a Qt-free class. The mapping:
 - Real-time WebSocket support (< 100ms latency)
 - Handles 100+ concurrent users
 - Easy deployment to VPS/cloud hosting (Railway, Heroku, DigitalOcean, or any VPS)
 - Configurable display options via URL parameters:
  - `timestamps=true/false` - Show/hide timestamps
  - `maxlines=50` - Maximum visible lines (prevents scroll bars in OBS)
  - `fontsize=16` - Font size in pixels
  - `fontfamily=Arial` - Font family
  - `fade=10` - Seconds before text fades (0 = never)
-See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructions
+| Legacy (MainWindow) | Headless (AppController) |
 |---------------------|--------------------------|
 | `_initialize_components()` | `_initialize_engine()` |
 | `_start_transcription()` | `start_transcription()` |
 | `_stop_transcription()` | `stop_transcription()` |
 | `_on_settings_saved()` | `apply_settings()` |
 | `_reload_engine()` | `reload_engine()` |
 | `_start_web_server_if_enabled()` | `_start_web_server()` |
 | `_start_server_sync()` | `_start_server_sync()` |
 | Qt signals | Callbacks (`on_state_changed`, `on_transcription`, etc.) |
-### Configuration System
+### Threading Model (Headless)
- Config stored at `~/.local-transcription/config.yaml`
+- Main thread: Uvicorn (FastAPI) event loop
- Managed by [client/config.py](client/config.py)
+- Engine init thread: Downloads models, initializes VAD
- Settings apply immediately without restart (except model changes)
+- Web server thread: Separate asyncio loop for OBS display
- YAML format with nested keys (e.g., `transcription.model`)
+- Audio capture: Runs in engine callback threads
 - All results flow through `AppController` callbacks -> `APIServer` WebSocket broadcast
-### Device Management
+### Svelte Frontend
- [client/device_utils.py](client/device_utils.py) handles CPU/GPU detection
+Uses Svelte 5 runes throughout (`$state`, `$derived`, `$effect`, `$props`). No Svelte 4 patterns.
 - Auto-detects CUDA, MPS (Mac), or falls back to CPU
 - Compute types: float32 (best quality), float16 (GPU), int8 (fastest)
 - Thread-safe device selection
-## Key Implementation Details
+**Stores** (`src/lib/stores/`):
 - `backend.ts` — WebSocket connection + REST helpers (`apiGet`, `apiPost`, `apiPut`), auto-reconnect
 - `config.ts` — fetches/updates config from backend API
 - `transcriptions.ts` — manages transcript list, listens for `CustomEvent`s from backend store
-### PyInstaller Build Configuration
+**Key patterns:**
 - Backend store dispatches `CustomEvent`s on `window` for cross-store communication
 - Settings component collects all changed values into a `Record<string, any>` with dot-notation keys, sends via `PUT /api/config`
 - Controls use Tauri dialog plugin for native file save, falls back to blob download
- [local-transcription.spec](local-transcription.spec) controls build
+## CI/CD
 - UPX compression enabled for smaller executables
 - Hidden imports required for PySide6, faster-whisper, torch
 - Console mode enabled by default (set `console=False` to hide)
-### Threading Model
+Two Gitea Actions workflows in `.gitea/workflows/`:
- Main thread: Qt GUI event loop
+- **`release.yml`**: Triggers on push to `main`. Auto-bumps version, builds Tauri app on Linux/Windows/macOS, uploads `.deb`, `.rpm`, `.msi`, `.dmg` to Gitea release.
- Audio thread: Captures and processes audio chunks
+- **`build-sidecar.yml`**: Triggers on changes to `client/`, `server/`, `backend/`, `pyproject.toml`. Builds headless Python sidecar via PyInstaller. CUDA + CPU for Linux/Windows, CPU-only for macOS.
 - Web server thread: Runs FastAPI server
 - Transcription: Runs in callback thread from audio capture
 - All transcription results communicated via Qt signals
-### Server Sync (Optional Multi-User Feature)
+Both require a `BUILD_TOKEN` secret (Gitea API token with release write access).
 - [client/server_sync.py](client/server_sync.py) handles server communication
 - Toggle in Settings: "Enable Server Sync"
 - Sends transcriptions to Node.js server via HTTP POST
 - Real-time updates via WebSocket to display page
 - Per-speaker font support (Web-Safe, Google Fonts, Custom uploads)
 - Falls back gracefully if server unavailable
 ## Common Patterns
 ### Adding a New Setting
-1. Add to [config/default_config.yaml](config/default_config.yaml)
+1. Add default to [config/default_config.yaml](config/default_config.yaml)
-2. Update [client/config.py](client/config.py) if validation needed
+2. Add UI control in [src/lib/components/Settings.svelte](src/lib/components/Settings.svelte)
-3. Add UI control in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
+3. Ensure the setting is included in the save handler's config update
-4. Apply setting in relevant component (no restart if possible)
+4. Apply in `AppController.apply_settings()` or the relevant component
-5. Emit signal to update display if needed
+5. For legacy GUI: also update [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
 ### Adding a New API Endpoint
 1. Add route in [backend/api_server.py](backend/api_server.py) `_setup_routes()`
 2. Add supporting logic in [backend/app_controller.py](backend/app_controller.py) if needed
 3. Call from Svelte via `backendStore.apiGet/apiPost/apiPut`
 ### Modifying Transcription Display
- Local GUI: [gui/transcription_display_qt.py](gui/transcription_display_qt.py)
+- Tauri UI: [src/lib/components/TranscriptionDisplay.svelte](src/lib/components/TranscriptionDisplay.svelte)
- Local web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
+- OBS display: [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
 - Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
 ### Adding a New Model Size
 - Update [client/transcription_engine.py](client/transcription_engine.py)
 - Add to model selector in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
 - Update CLI argument choices in [main_cli.py](main_cli.py)
 ## Dependencies
-**Core:**
+**Frontend:** Tauri v2, Svelte 5, Vite, TypeScript
- `faster-whisper`: Optimized Whisper inference
+**Backend:** Python 3.9+, FastAPI, Uvicorn, RealtimeSTT, faster-whisper, PyTorch (CUDA), sounddevice
- `torch`: ML framework (CUDA-enabled via special index)
+**Build:** PyInstaller (sidecar), Tauri CLI (app), uv (Python packages)
- `PySide6`: Qt6 bindings for GUI
+**CI:** Gitea Actions with platform-specific runners
 - `sounddevice`: Cross-platform audio I/O
 - `noisereduce`, `webrtcvad`: Audio preprocessing
 **Web Server:**
 - `fastapi`, `uvicorn`: Web server and ASGI
 - `websockets`: Real-time communication
 **Build:**
 - `pyinstaller`: Create standalone executables
 - `uv`: Fast package manager
 **PyTorch CUDA Index:**
 - Configured in [pyproject.toml](pyproject.toml) under `[[tool.uv.index]]`
 - Uses PyTorch's custom wheel repository for CUDA builds
 - Automatically installed with `uv sync` when using CUDA build scripts
 ## Platform-Specific Notes
 ### Linux
- Uses PulseAudio/ALSA for audio
+- Tauri needs: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
- Build scripts use bash (`.sh` files)
+- Audio: PulseAudio/ALSA via sounddevice
 - Executable: `dist/LocalTranscription/LocalTranscription`
 ### Windows
- Uses Windows Audio/WASAPI
+- Tauri needs: WebView2 (usually pre-installed on Windows 10+)
- Build scripts use batch (`.bat` files)
+- Audio: WASAPI via sounddevice
 - Executable: `dist\LocalTranscription\LocalTranscription.exe`
 - Requires Visual C++ Redistributable on target systems
-### Cross-Building
+### macOS
- **Cannot cross-compile** - must build on target platform
+- Tauri needs: Xcode Command Line Tools
- CI/CD should use platform-specific runners
+- Audio: CoreAudio via sounddevice
-
+- GPU: MPS (Apple Silicon) detected by `device_utils.py`
-## Troubleshooting
+- `Info.plist` must include `NSMicrophoneUsageDescription` for mic access
-
+- No CUDA builds — CPU/MPS only
 ### Model Loading Issues
 - Models download to `~/.cache/huggingface/`
 - First run requires internet connection
 - Check disk space (models: 75MB-3GB depending on size)
 ### Audio Device Issues
 - Run `uv run python main_cli.py --list-devices`
 - Check permissions (microphone access)
 - Try different device indices in settings
 ### GPU Not Detected
 - Run `uv run python check_cuda.py`
 - Install CUDA drivers (not CUDA toolkit - bundled in build)
 - Verify PyTorch sees GPU: `python -c "import torch; print(torch.cuda.is_available())"`
 ### Web Server Port Conflicts
 - Default port: 8080
 - Change in [gui/main_window_qt.py](gui/main_window_qt.py) or config
 - Use `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
 ## OBS Integration
 ### Local Display (Single User)
 1. Start Local Transcription app
 2. In OBS: Add "Browser" source
 3. URL: `http://localhost:8080`
 4. Set dimensions (e.g., 1920x300)
 ### Multi-User Display (Node.js Server)
 1. Deploy Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
 2. Each user configures Server URL: `http://your-server:3000/api/send`
 3. Enter same room name and passphrase
 4. In OBS: Add "Browser" source
 5. URL: `http://your-server:3000/display?room=ROOM&fade=10&timestamps=true&maxlines=50&fontsize=16`
 6. Customize URL parameters as needed:
   - `timestamps=false` - Hide timestamps
   - `maxlines=30` - Show max 30 lines (prevents scroll bars)
   - `fontsize=18` - Larger font
   - `fontfamily=Courier` - Different font
 ## Performance Optimization
 **For Real-Time Transcription:**
 - Use `tiny` or `base` model (faster)
 - Enable GPU if available (5-10x faster)
 - Increase chunk_duration for better accuracy (higher latency)
 - Decrease chunk_duration for lower latency (less context)
 - Enable VAD to skip silent audio
 **For Build Size Reduction:**
 - Don't bundle models (download on demand)
 - Use CPU-only build if no GPU users
 - Enable UPX compression (already in spec)
 ## Phase Status
 - ✅ **Phase 1**: Standalone desktop application (complete)
 - ✅ **Web Server**: Local OBS integration (complete)
 - ✅ **Builds**: PyInstaller executables (complete)
 - ✅ **Phase 2**: Multi-user Node.js server (complete, optional)
 - ⏸️ **Phase 3+**: Advanced features (see [NEXT_STEPS.md](NEXT_STEPS.md))
 ## Related Documentation
- [README.md](README.md) - User-facing documentation
+- [README.md](README.md) — User-facing documentation
- [BUILD.md](BUILD.md) - Detailed build instructions
+- [BUILD.md](BUILD.md) — Detailed build instructions
- [INSTALL.md](INSTALL.md) - Installation guide
+- [INSTALL.md](INSTALL.md) — Installation guide
- [NEXT_STEPS.md](NEXT_STEPS.md) - Future enhancements
+- [server/nodejs/README.md](server/nodejs/README.md) — Node.js server setup
 - [server/nodejs/README.md](server/nodejs/README.md) - Node.js server setup and deployment
--- a/DEEPGRAM_PROXY_PLAN.md
+++ b/DEEPGRAM_PROXY_PLAN.md
@@ -0,0 +1,574 @@
 # Deepgram Proxy Service — Build Plan
 ## Project Overview
 Build a standalone hosted service that acts as a Deepgram proxy for the Local Transcription
 desktop app. Users can either provide their own Deepgram API key (BYOK) or use the managed
 service with prepaid credits purchased via Stripe.
 This is a **separate repository** from `local-transcription`. The desktop app will be updated
 in a second phase to support both modes.
 ---
 ## Repository Structure
 ```
 transcription-proxy/
 ├── src/
 │   ├── server.js              # Express app entry point
 │   ├── config.js              # Environment config loader
 │   ├── db/
 │   │   ├── index.js           # node-postgres pool setup
 │   │   └── migrations/        # SQL migration files (numbered)
 │   │       ├── 001_users.sql
 │   │       ├── 002_credits.sql
 │   │       ├── 003_sessions.sql
 │   │       └── 004_usage_ledger.sql
 │   ├── middleware/
 │   │   ├── auth.js            # JWT verification middleware
 │   │   └── rateLimit.js       # Per-user rate limiting
 │   ├── routes/
 │   │   ├── auth.js            # POST /auth/register, /auth/login, /auth/refresh
 │   │   ├── billing.js         # POST /billing/checkout, GET /billing/balance
 │   │   └── account.js         # GET /account/me, GET /account/usage
 │   ├── websocket/
 │   │   └── proxy.js           # WebSocket proxy handler (core feature)
 │   └── webhooks/
 │       └── stripe.js          # POST /webhooks/stripe
 ├── web/                       # Simple frontend dashboard
 │   ├── index.html             # Landing / login page
 │   ├── dashboard.html         # Balance, usage history, buy credits
 │   └── assets/
 │       ├── app.js
 │       └── style.css
 ├── .env.example
 ├── package.json
 ├── docker-compose.yml         # Postgres + app for local dev
 └── CLAUDE.md                  # This file (after renaming)
 ```
 ---
 ## Technology Stack
 - **Runtime**: Node.js 20+
 - **Framework**: Express 4
 - **WebSocket**: `ws` library (not socket.io — keep it lean)
 - **Database**: PostgreSQL 15+ via `pg` (node-postgres)
 - **Auth**: JWT via `jsonwebtoken`, passwords hashed with `bcrypt`
 - **Payments**: Stripe Node SDK (`stripe`)
 - **Environment**: `dotenv`
 - **Dev tooling**: `nodemon` for dev, no TypeScript (keep it simple)
 ---
 ## Database Schema
 Run migrations in order. Use a simple `schema_migrations` table to track applied migrations.
 ### 001_users.sql
 ```sql
 CREATE TABLE schema_migrations (
  version INTEGER PRIMARY KEY,
  applied_at TIMESTAMPTZ DEFAULT NOW()
 );
 CREATE TABLE users (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  email TEXT UNIQUE NOT NULL,
  password_hash TEXT NOT NULL,
  stripe_customer_id TEXT UNIQUE,
  created_at TIMESTAMPTZ DEFAULT NOW(),
  updated_at TIMESTAMPTZ DEFAULT NOW()
 );
 ```
 ### 002_credits.sql
 ```sql
 CREATE TABLE credit_balance (
  user_id UUID PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
  seconds_remaining INTEGER NOT NULL DEFAULT 0,
  updated_at TIMESTAMPTZ DEFAULT NOW()
 );
 ```
 ### 003_sessions.sql
 ```sql
 CREATE TABLE transcription_sessions (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  user_id UUID NOT NULL REFERENCES users(id),
  mode TEXT NOT NULL CHECK (mode IN ('managed', 'byok')),
  started_at TIMESTAMPTZ DEFAULT NOW(),
  ended_at TIMESTAMPTZ,
  seconds_used INTEGER NOT NULL DEFAULT 0,
  deepgram_model TEXT,
  status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'completed', 'terminated'))
 );
 CREATE INDEX idx_sessions_user_id ON transcription_sessions(user_id);
 CREATE INDEX idx_sessions_started_at ON transcription_sessions(started_at);
 ```
 ### 004_usage_ledger.sql
 ```sql
 CREATE TABLE usage_ledger (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  user_id UUID NOT NULL REFERENCES users(id),
  session_id UUID REFERENCES transcription_sessions(id),
  recorded_at TIMESTAMPTZ DEFAULT NOW(),
  seconds INTEGER NOT NULL,
  description TEXT  -- e.g. 'session_usage', 'credit_purchase', 'manual_adjustment'
 );
 CREATE INDEX idx_ledger_user_id ON usage_ledger(user_id);
 ```
 ---
 ## Environment Variables (.env.example)
 ```env
 # Server
 PORT=3000
 NODE_ENV=development
 # Database
 DATABASE_URL=postgresql://user:password@localhost:5432/transcription_proxy
 # Auth
 JWT_SECRET=changeme_use_long_random_string
 JWT_EXPIRY=7d
 # Stripe
 STRIPE_SECRET_KEY=sk_test_...
 STRIPE_WEBHOOK_SECRET=whsec_...
 # Deepgram
 DEEPGRAM_API_KEY=your_deepgram_key_here
 # Pricing (seconds per dollar — adjust for your margin)
 # Default: 1000 seconds per $1 = $0.006/min managed cost covered + margin
 CREDITS_PER_DOLLAR=1000
 ```
 ---
 ## Phase 1 — Core Server & Auth
 ### Goals
 - Working Express app with Postgres connection
 - Migration runner
 - User registration and login
 - JWT middleware
 ### Tasks
 1. **Scaffold project**
   - `npm init`, install dependencies: `express ws pg jsonwebtoken bcrypt stripe dotenv`
   - Dev dependencies: `nodemon`
   - Add `start` and `dev` scripts to package.json
 2. **Database connection** (`src/db/index.js`)
   - Export a `pg.Pool` instance using `DATABASE_URL`
   - Export a `migrate()` function that reads `src/db/migrations/*.sql` in order,
     checks `schema_migrations` table, and applies unapplied ones
   - Call `migrate()` on server startup before listening
 3. **Auth routes** (`src/routes/auth.js`)
   - `POST /auth/register` — validate email/password, hash password with bcrypt (cost 12),
     insert user, insert empty credit_balance row, return JWT
   - `POST /auth/login` — verify credentials, return JWT + refresh token
   - `POST /auth/refresh` — validate refresh token, return new JWT
   - Passwords: minimum 8 characters, validate email format
 4. **JWT middleware** (`src/middleware/auth.js`)
   - Verify `Authorization: Bearer <token>` header
   - Attach `req.user = { id, email }` on success
   - Return 401 on failure
   - Export as `requireAuth` middleware
 5. **Basic health check**
   - `GET /health` returns `{ status: 'ok', db: 'connected' }`
 ---
 ## Phase 2 — Billing & Credits
 ### Goals
 - Stripe Checkout session creation for credit purchases
 - Webhook handler to fulfill purchases
 - Balance endpoint
 ### Payment Methods
 Use **Stripe Dynamic Payment Methods** — do NOT hardcode `payment_method_types` in the
 Checkout Session. Instead, leave it unset and manage everything from the Stripe Dashboard.
 Enable the following in the Stripe Dashboard under Settings → Payment Methods:
 - **Cards** (Visa, Mastercard, Amex, Discover) — on by default
 - **PayPal** — enable manually
 - **Apple Pay** — on by default, shows automatically on Safari/iOS
 - **Google Pay** — enable manually (one toggle)
 - **Cash App Pay** — enable manually (popular with streaming audiences)
 - **Link** — Stripe's saved payment network, on by default
 Stripe will automatically show the most relevant methods to each user based on their
 location and device. No code changes are needed to add or remove methods in future —
 it's all dashboard config.
 ### Credit Packages
 Define these as constants in `src/config.js`:
 ```javascript
 CREDIT_PACKAGES: [
  { id: 'pack_500',  label: '500 minutes',  seconds: 30000,  price_cents: 300  },
  { id: 'pack_1200', label: '1200 minutes', seconds: 72000,  price_cents: 600  },
  { id: 'pack_3000', label: '3000 minutes', seconds: 180000, price_cents: 1200 },
 ]
 ```
 Adjust pricing to cover Deepgram costs ($0.006/min = $0.0001/sec) plus margin and
 Stripe fees (~2.9% + $0.30).
 ### Tasks
 1. **Stripe customer creation**
   - On user registration, create a Stripe customer and store `stripe_customer_id`
   - Do this asynchronously (don't block registration response)
 2. **Billing routes** (`src/routes/billing.js`)
   - `GET /billing/packages` — return credit package list (no auth required)
   - `POST /billing/checkout` — requires auth, accepts `{ package_id }`,
     creates Stripe Checkout Session using dynamic payment methods (do NOT pass
     `payment_method_types` — omitting it enables dynamic methods automatically),
     include `payment_intent_data.metadata` containing `user_id` and `package_id`,
     returns `{ checkout_url }`
   - `GET /billing/balance` — requires auth, returns `{ seconds_remaining, minutes_remaining }`
 3. **Stripe webhook** (`src/webhooks/stripe.js`)
   - Mount at `POST /webhooks/stripe` with raw body (use `express.raw()` for this route only)
   - Verify signature with `stripe.webhooks.constructEvent()`
   - Handle `checkout.session.completed`:
     - Extract `user_id` and `package_id` from metadata
     - Add seconds to `credit_balance`
     - Insert row into `usage_ledger` with description `'credit_purchase'`
   - Handle `payment_intent.payment_failed`: log it (no action needed for prepaid)
 4. **Success/cancel pages**
   - Stripe Checkout redirects to `GET /billing/success?session_id=...` and `/billing/cancel`
   - These can be simple HTML responses or redirects to the web dashboard
 ---
 ## Phase 3 — WebSocket Proxy (Core Feature)
 This is the most critical component. The proxy sits between the desktop client and Deepgram,
 forwarding audio while tracking usage in real time.
 ### Connection Flow
 ```
 Client connects → validate JWT → check credit balance → open Deepgram upstream
     ↓
 Audio chunks arrive → forward to Deepgram → record usage every 5 seconds
     ↓
 Transcription arrives from Deepgram → forward to client
     ↓
 Client disconnects (or credits exhausted) → close upstream → finalize session
 ```
 ### WebSocket Protocol
 **Client connects to**: `wss://your-domain/ws/transcribe`
 **Client sends as first message** (JSON):
 ```json
 {
  "type": "auth",
  "token": "<JWT>",
  "config": {
    "model": "nova-2",
    "language": "en-US",
    "interim_results": true,
    "endpointing": 300
  }
 }
 ```
 **After auth success, client sends**: raw audio binary frames (PCM 16kHz mono)
 **Server sends to client**:
 ```json
 { "type": "ready" }
 { "type": "transcript", "text": "...", "is_final": true, "confidence": 0.98 }
 { "type": "error", "code": "insufficient_credits", "message": "..." }
 { "type": "credits_low", "seconds_remaining": 300 }
 { "type": "session_end", "seconds_used": 120 }
 ```
 ### Tasks (`src/websocket/proxy.js`)
 1. **Upgrade handler**
   - Attach to the HTTP server using `ws.Server({ noServer: true })`
   - In `server.on('upgrade', ...)`, route `/ws/transcribe` to this handler
 2. **Auth handshake**
   - First message must be `{ type: 'auth', token: '...' }` — received within 5 seconds
     or connection is terminated
   - Verify JWT, load user's credit balance from DB
   - If balance is 0 or negative, send `insufficient_credits` error and close
 3. **Deepgram upstream connection**
   - Open a WebSocket to Deepgram's streaming API:
     `wss://api.deepgram.com/v1/listen?model=nova-2&language=en-US&interim_results=true`
   - Auth header: `Authorization: Token <DEEPGRAM_API_KEY>`
   - Use query params from client's `config` object (whitelist allowed params)
 4. **Audio forwarding**
   - All binary messages from client → forward directly to Deepgram upstream
   - All messages from Deepgram → parse JSON, reformat, forward to client
 5. **Usage tracking**
   - Create a `transcription_sessions` row on connection
   - Maintain an in-memory `secondsUsed` counter per connection
   - Deepgram sends `{ type: 'Results', duration: X }` in responses — use this for
     accurate second counting
   - Every 10 seconds (or on disconnect), write current `secondsUsed` to DB:
     - Update `transcription_sessions.seconds_used`
     - Decrement `credit_balance.seconds_remaining`
     - Insert into `usage_ledger`
   - If `seconds_remaining` hits 0: send `insufficient_credits`, close connection
 6. **Cleanup on disconnect**
   - Mark session as `completed`, set `ended_at`
   - Do final usage flush to DB
   - Close Deepgram upstream if still open
 7. **Error handling**
   - If Deepgram upstream closes unexpectedly, notify client and close
   - If client sends malformed data, log and continue (don't crash)
 ---
 ## Phase 4 — Account Routes & Rate Limiting
 ### Tasks
 1. **Account routes** (`src/routes/account.js`)
   - `GET /account/me` — returns `{ email, credits: { seconds_remaining, minutes_remaining }, created_at }`
   - `GET /account/usage` — returns last 30 days of `usage_ledger` entries grouped by day,
     plus list of last 10 sessions with duration
 2. **Rate limiting** (`src/middleware/rateLimit.js`)
   - Use in-memory rate limiting (no Redis needed at this scale)
   - Auth endpoints: max 10 requests per minute per IP
   - WebSocket connections: max 2 concurrent connections per user
     (store active connections in a `Map<userId, Set<ws>>`)
 ---
 ## Phase 5 — Web Dashboard
 A simple, functional HTML/CSS/JS dashboard. No framework — vanilla JS is fine.
 This is a developer-friendly streamer tool, not a consumer SaaS, so clean and
 functional beats flashy.
 ### Pages
 **`/` (Landing / Login)**
 - Brief product description (what this is, why it exists)
 - Login form and link to register
 - Link to GitHub/Gitea repo
 **`/dashboard` (Post-login)**
 - Current credit balance (minutes remaining, prominently displayed)
 - "Buy Credits" section showing the three packages with Stripe Checkout buttons
 - Usage chart: last 30 days bar chart (vanilla canvas or a small CDN chart lib)
 - Recent sessions table: date, duration, status
 **`/register`**
 - Registration form
 ### Implementation Notes
 - Store JWT in `localStorage`, attach as `Authorization` header on API calls
 - Redirect to `/` if JWT missing or expired
 - Keep CSS minimal but readable — this is a utility dashboard
 ---
 ## Phase 6 — Desktop App Integration
 Changes needed in the `local-transcription` Python repo.
 ### New file: `client/remote_transcription.py`
 This module replaces `transcription_engine_realtime.py` when remote mode is active.
 ```python
 # Pseudocode / spec for Claude Code to implement
 class RemoteTranscriptionEngine:
    """
    Connects to the transcription proxy WebSocket and streams audio.
    Provides the same callback interface as the local engine so the
    rest of the app doesn't need to change.
    """
    def __init__(self, config, on_transcript_callback):
        # config contains: server_url, auth_token (or byok_api_key), model
        ...
    def start(self):
        # Open WebSocket connection
        # Send auth message
        # Start audio capture thread (reuse existing audio_capture.py)
        ...
    def stop(self):
        # Close WebSocket gracefully
        ...
    def _on_audio_chunk(self, audio_data):
        # Called by audio_capture.py with raw PCM data
        # Send as binary WebSocket frame
        ...
    def _on_server_message(self, message):
        # Parse JSON from server
        # On type='transcript': call on_transcript_callback
        # On type='credits_low': trigger UI warning
        # On type='error': surface to user
        ...
 ```
 ### BYOK Mode
 When user provides their own Deepgram key, connect directly to Deepgram instead of the proxy:
 - Endpoint: `wss://api.deepgram.com/v1/listen?...`
 - Auth: `Authorization: Token <user_key>`
 - No session tracking (Deepgram handles billing directly to the user)
 - Same `RemoteTranscriptionEngine` class, just different URL and auth header
 ### Settings Changes (`gui/settings_dialog_qt.py`)
 Add a new "Transcription Mode" section:
 ```
 Transcription Mode:
  ○ Local (Whisper)          [existing behavior]
  ○ Remote - Managed         [requires login]
  ○ Remote - BYOK            [requires Deepgram API key]
 [If Managed selected]:
  Server URL: [____________]
  [Login / Register]  [View Balance: 420 min remaining]
 [If BYOK selected]:
  Deepgram API Key: [____________]
  Model: [nova-2 ▼]
 ```
 ### Config additions (`config/default_config.yaml`)
 ```yaml
 remote:
  mode: local           # local | managed | byok
  server_url: ""        # proxy server URL for managed mode
  auth_token: ""        # JWT stored after login
  byok_api_key: ""      # Deepgram key for BYOK mode
  deepgram_model: nova-2
  language: en-US
 ```
 ---
 ## Build & Deployment Notes
 ### Docker Compose (local dev)
 ```yaml
 version: '3.8'
 services:
  db:
    image: postgres:15
    environment:
      POSTGRES_DB: transcription_proxy
      POSTGRES_USER: user
      POSTGRES_PASSWORD: password
    ports:
      - "5432:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
  app:
    build: .
    ports:
      - "3000:3000"
    environment:
      DATABASE_URL: postgresql://user:password@db:5432/transcription_proxy
    depends_on:
      - db
    volumes:
      - .:/app
      - /app/node_modules
 volumes:
  pgdata:
 ```
 ### Production Deployment
 This service is a good fit for deployment on AnHonestHost WHP as a containerized app,
 or on a small DigitalOcean/Linode VPS. Requirements are light:
 - 512MB RAM is sufficient
 - Postgres can be the same instance as other services or managed (e.g., Supabase free tier)
 - Needs a public domain with SSL for WebSocket (`wss://`) to work from desktop clients
 Reverse proxy config (Nginx or HAProxy) should:
 - Proxy HTTP → `localhost:3000`
 - Pass `Upgrade` and `Connection` headers for WebSocket support
 - Set `proxy_read_timeout 3600` (sessions can be long)
 ---
 ## Implementation Order
 Build and test in this sequence:
 1. Project scaffold + DB connection + migrations
 2. Auth (register/login/JWT) — test with curl
 3. Stripe billing + webhook — test with Stripe CLI (`stripe listen`)
 4. WebSocket proxy — test with a simple browser WebSocket client first
 5. Usage tracking and credit decrement
 6. Account/usage routes
 7. Web dashboard
 8. Desktop app integration (separate PR in local-transcription repo)
 ---
 ## Key Decisions & Rationale
 | Decision | Choice | Reason |
 |---|---|---|
 | Credits model | Prepaid | No surprise charges, simpler billing, better for irregular streamer usage |
 | WebSocket library | `ws` | Lightweight, no abstraction overhead, plays well with raw binary audio |
 | Auth | JWT (stateless) | Desktop app holds token locally; no session store needed |
 | DB driver | `node-postgres` (pg) | No ORM overhead; schema is simple enough for raw SQL |
 | Migrations | Raw SQL files | No dependency on Knex/Prisma; easy to inspect and reason about |
 | Rate limiting | In-memory | Redis is overkill for this scale; single-process Node is fine initially |
 | Frontend | Vanilla JS | Dashboard is simple utility UI; no framework justified |
 ---
 ## What This Plan Does NOT Cover (Future Work)
 - OAuth / social login
 - Admin panel for managing users
 - Refund / credit adjustment tooling
 - Email verification
 - Password reset flow
 - Multi-language support beyond Deepgram's defaults
 - Analytics / aggregated usage reporting
 - Self-hosted Whisper inference as a third backend option
--- a/DEEPGRAM_PROXY_PLAN.md:Zone.Identifier
+++ b/DEEPGRAM_PROXY_PLAN.md:Zone.Identifier
--- a/README.md
+++ b/README.md
@@ -1,13 +1,14 @@
 # Local Transcription
-A real-time speech-to-text desktop application for streamers. Run locally on your machine with GPU or CPU, display transcriptions via OBS browser source, and optionally sync with other users through a multi-user server.
+A real-time speech-to-text desktop application for streamers. Runs locally on your machine with GPU or CPU, displays transcriptions via OBS browser source, and optionally syncs with other users through a multi-user server.
 **Version 1.4.0**
 ## Features
 - **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
- **Standalone Desktop App**: PySide6/Qt GUI that works without any server
+- **Cross-Platform**: Native desktop app for Windows, macOS, and Linux via [Tauri](https://tauri.app/)
 - **Dual Transcription Modes**: Local (Whisper) or cloud (Deepgram) with managed billing or BYOK
 - **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
 - **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
 - **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
@@ -16,36 +17,70 @@ A real-time speech-to-text desktop application for streamers. Run locally on you
 - **Customizable Colors**: User-configurable colors for name, text, and background
 - **Noise Suppression**: Built-in audio preprocessing to reduce background noise
 - **Auto-Updates**: Automatic update checking with release notes display
- **Cross-Platform**: Builds available for Windows and Linux
+
 ## Architecture
 The application uses a two-process architecture:
 1. **Tauri Shell** (Svelte 5 frontend) — lightweight native window (~50MB) rendering the UI
 2. **Python Backend** (sidecar) — headless process running transcription, audio capture, and the OBS web server
 The Tauri frontend communicates with the Python backend via REST API and WebSocket, following the same pattern as [voice-to-notes](https://repo.anhonesthost.net/MacroPad/voice-to-notes).
 ```
 Tauri App (user launches this)
  └─ Spawns Python backend as sidecar
       ├─ FastAPI REST API (control endpoints)
       ├─ WebSocket /ws/control (real-time state + transcriptions)
       ├─ OBS web display at http://localhost:8080
       └─ Transcription engine (Whisper or Deepgram)
 ```
 > **Legacy GUI**: The original PySide6/Qt desktop GUI (`main.py`) still works alongside the new Tauri frontend during the transition period.
 ## Quick Start
 ### Running from Source
 ```bash
-# Install dependencies
+# Install Python dependencies
 uv sync
-# Run the application
+# Run the Tauri app (frontend + backend)
 npm install
 npm run tauri dev
 # Or run just the headless backend (for development)
 uv run python -m backend.main_headless
 # Or run the legacy PySide6 GUI
 uv run python main.py
 ```
 ### Using Pre-Built Executables
-Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases) and run the executable for your platform.
+Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases):
 - **App installer** (Tauri shell): `.msi` (Windows), `.dmg` (macOS), `.deb`/`.rpm`/`.AppImage` (Linux)
 - **Sidecar** (Python backend): Download the matching `sidecar-*` zip for your platform (CUDA or CPU)
 ### Building from Source
 **Linux:**
 ```bash
-./build.sh
+# Build the Tauri app
-# Output: dist/LocalTranscription/LocalTranscription
+npm install
-```
+npm run tauri build
 # Output: src-tauri/target/release/bundle/
-**Windows:**
+# Build the Python sidecar (headless, no Qt)
-```cmd
+uv sync
 uv run pyinstaller local-transcription-headless.spec
 # Output: dist/local-transcription-backend/
 # Build the legacy PySide6 app (Linux)
 ./build.sh
 # Build the legacy PySide6 app (Windows)
 build.bat
 # Output: dist\LocalTranscription\LocalTranscription.exe
 ```
 For detailed build instructions, see [BUILD.md](BUILD.md).
@@ -57,14 +92,23 @@ For detailed build instructions, see [BUILD.md](BUILD.md).
 1. Launch the application
 2. Select your microphone from the audio device dropdown
 3. Choose a Whisper model (smaller = faster, larger = more accurate):
-   - `tiny.en` / `tiny` - Fastest, good for quick captions
+   - `tiny.en` / `tiny` — Fastest, good for quick captions
-   - `base.en` / `base` - Balanced speed and accuracy
+   - `base.en` / `base` — Balanced speed and accuracy
-   - `small.en` / `small` - Better accuracy
+   - `small.en` / `small` — Better accuracy
-   - `medium.en` / `medium` - High accuracy
+   - `medium.en` / `medium` — High accuracy
-   - `large-v3` - Best accuracy (requires more resources)
+   - `large-v3` — Best accuracy (requires more resources)
 4. Click **Start** to begin transcription
 5. Transcriptions appear in the main window and at `http://localhost:8080`
 ### Remote Transcription (Deepgram)
 Instead of local Whisper models, you can use cloud-based transcription:
 - **Managed mode**: Sign up via the transcription proxy for metered billing
 - **BYOK mode**: Bring your own Deepgram API key for direct access
 Configure in Settings > Remote Transcription.
 ### OBS Browser Source Setup
 1. Start the Local Transcription app
@@ -88,7 +132,7 @@ For syncing transcriptions across multiple users (e.g., multi-host streams or tr
 ## Configuration
-Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel.
+Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel or the REST API.
 ### Key Settings
@@ -100,6 +144,7 @@ Settings are stored at `~/.local-transcription/config.yaml` and can be modified
 | `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
 | `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
 | `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
 | `remote.mode` | Transcription mode (local/managed/byok) | `local` |
 | `display.show_timestamps` | Show timestamps with transcriptions | `true` |
 | `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
 | `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
@@ -111,67 +156,114 @@ See [config/default_config.yaml](config/default_config.yaml) for all available o
 ```
 local-transcription/
 ├── src/                             # Svelte 5 frontend (Tauri UI)
 │   ├── App.svelte                   # Main app shell
 │   ├── lib/components/              # UI components
 │   │   ├── Header.svelte
 │   │   ├── StatusBar.svelte
 │   │   ├── Controls.svelte
 │   │   ├── TranscriptionDisplay.svelte
 │   │   └── Settings.svelte
 │   └── lib/stores/                  # Reactive state management
 │       ├── backend.ts               # WebSocket + REST API client
 │       ├── config.ts                # App configuration
 │       └── transcriptions.ts        # Transcription data
 ├── src-tauri/                       # Tauri v2 Rust shell
 │   ├── src/main.rs
 │   └── tauri.conf.json
 ├── backend/                         # Headless Python backend (sidecar)
 │   ├── app_controller.py            # Orchestration logic (engine, sync, config)
 │   ├── api_server.py                # FastAPI REST + WebSocket control API
 │   └── main_headless.py             # Headless entry point
 ├── client/                          # Core transcription modules
 │   ├── audio_capture.py             # Audio input handling
-│   ├── transcription_engine_realtime.py  # RealtimeSTT integration
+│   ├── transcription_engine_realtime.py  # RealtimeSTT / Whisper
 │   ├── deepgram_transcription.py    # Deepgram cloud transcription
 │   ├── noise_suppression.py         # VAD and noise reduction
-│   ├── device_utils.py          # CPU/GPU detection
+│   ├── device_utils.py              # CPU/GPU/MPS detection
 │   ├── config.py                    # Configuration management
 │   ├── server_sync.py               # Multi-user server client
 │   └── update_checker.py            # Auto-update functionality
-├── gui/                         # Desktop application UI
+├── gui/                             # Legacy PySide6/Qt GUI
-│   ├── main_window_qt.py        # Main application window
+│   ├── main_window_qt.py
-│   ├── settings_dialog_qt.py    # Settings dialog
+│   ├── settings_dialog_qt.py
-│   └── transcription_display_qt.py  # Display widget
+│   └── transcription_display_qt.py
 ├── server/                          # Web servers
 │   ├── web_display.py               # Local FastAPI server for OBS
 │   └── nodejs/                      # Multi-user sync server
-│       ├── server.js            # Express + WebSocket server
+├── .gitea/workflows/                # CI/CD
-│       └── README.md            # Deployment instructions
+│   ├── release.yml                  # Tauri app builds (all platforms)
 │   └── build-sidecar.yml            # Python sidecar builds (CUDA + CPU)
 ├── config/
 │   └── default_config.yaml          # Default settings template
-├── main.py                      # GUI entry point
+├── main.py                          # Legacy GUI entry point
 ├── main_cli.py                      # CLI version (for testing)
-├── build.sh                     # Linux build script
+├── local-transcription.spec         # PyInstaller config (legacy, with PySide6)
-├── build.bat                    # Windows build script
+├── local-transcription-headless.spec # PyInstaller config (headless sidecar)
-└── local-transcription.spec     # PyInstaller configuration
+├── pyproject.toml                   # Python dependencies
 └── package.json                     # Node.js / Tauri dependencies
 ```
 ## Technology Stack
-### Desktop Application
+### Frontend (Tauri)
 - **Tauri v2** — Native cross-platform shell (Rust)
 - **Svelte 5** — Reactive UI framework (TypeScript)
 - **Vite** — Frontend build tool
 ### Backend (Python Sidecar)
 - **Python 3.9+**
- **PySide6** - Qt6 GUI framework
+- **FastAPI + Uvicorn** — REST API and WebSocket server
- **RealtimeSTT** - Real-time speech-to-text with advanced VAD
+- **RealtimeSTT** — Real-time speech-to-text with advanced VAD
- **faster-whisper** - Optimized Whisper model inference
+- **faster-whisper** — Optimized Whisper model inference (CTranslate2)
- **PyTorch** - ML framework (CUDA-enabled)
+- **PyTorch** — ML framework (CUDA-enabled builds available)
- **sounddevice** - Cross-platform audio capture
+- **sounddevice** — Cross-platform audio capture
- **webrtcvad + silero_vad** - Voice activity detection
+- **webrtcvad + silero_vad** — Voice activity detection
 - **noisereduce** - Noise suppression
-### Web Servers
+### Multi-User Server (Optional)
- **FastAPI + Uvicorn** - Local web display server
+- **Node.js + Express + WebSocket** — Real-time sync server
 - **Node.js + Express + WebSocket** - Multi-user sync server
-### Build Tools
+### Build & CI/CD
- **PyInstaller** - Executable packaging
+- **PyInstaller** — Python sidecar packaging
- **uv** - Fast Python package manager
+- **Tauri CLI** — App bundling (.msi, .dmg, .deb, .rpm, .AppImage)
 - **Gitea Actions** — Automated cross-platform builds
 - **uv** — Fast Python package manager
 ## CI/CD
 Two Gitea Actions workflows in `.gitea/workflows/`:
 | Workflow | Trigger | Produces |
 |----------|---------|----------|
 | `release.yml` | Push to `main` | Tauri app installers for all platforms |
 | `build-sidecar.yml` | Changes to `client/`, `server/`, `backend/`, or `pyproject.toml` | Python sidecar zips (CUDA + CPU) |
 Both workflows require a `BUILD_TOKEN` secret in the repo settings (Gitea API token with release write access).
 ### Release Artifacts
 | Platform | App Installer | Sidecar (CUDA) | Sidecar (CPU) |
 |----------|--------------|----------------|---------------|
 | Linux x86_64 | `.deb`, `.rpm`, `.AppImage` | `sidecar-linux-x86_64-cuda.zip` | `sidecar-linux-x86_64-cpu.zip` |
 | Windows x86_64 | `.msi`, `-setup.exe` | `sidecar-windows-x86_64-cuda.zip` | `sidecar-windows-x86_64-cpu.zip` |
 | macOS ARM64 | `.dmg` | — | `sidecar-macos-aarch64-cpu.zip` |
 ## System Requirements
 ### Minimum
 - Python 3.9+
 - 4GB RAM
 - Any modern CPU
-### Recommended (for real-time performance)
+### Recommended (for local real-time transcription)
 - 8GB+ RAM
 - NVIDIA GPU with CUDA support (for GPU acceleration)
 - FFmpeg (installed automatically with dependencies)
 ### For Building
- **Linux**: gcc, Python dev headers
+- **Tauri app**: Node.js 20+, Rust stable, platform SDK (see [Tauri prerequisites](https://tauri.app/start/prerequisites/))
- **Windows**: Visual Studio Build Tools, Python dev headers
+- **Python sidecar**: Python 3.9+, uv, PyInstaller
 - **Linux**: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
 - **Windows**: Visual Studio Build Tools, WebView2
 - **macOS**: Xcode Command Line Tools
 ## Troubleshooting
@@ -185,7 +277,7 @@ local-transcription/
 # List available audio devices
 uv run python main_cli.py --list-devices
 ```
- Ensure microphone permissions are granted
+- Ensure microphone permissions are granted (especially on macOS)
 - Try different device indices in settings
 ### GPU Not Detected
@@ -193,13 +285,13 @@ uv run python main_cli.py --list-devices
 # Check CUDA availability
 uv run python -c "import torch; print(torch.cuda.is_available())"
 ```
- Install NVIDIA drivers (CUDA toolkit is bundled)
+- Install NVIDIA drivers (CUDA toolkit is bundled in CUDA sidecar builds)
 - The app automatically falls back to CPU if no GPU is available
 ### Web Server Port Conflicts
- Default port is 8080
+- Default port is 8080; the app tries ports 8080-8084 automatically
 - Change in settings or edit config file
- Check for conflicts: `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
+- Check for conflicts: `lsof -i :8080` (Linux/macOS) or `netstat -ano | findstr :8080` (Windows)
 ## Use Cases
@@ -222,3 +314,5 @@ MIT License
 - [OpenAI Whisper](https://github.com/openai/whisper) for the speech recognition model
 - [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT) for real-time transcription capabilities
 - [faster-whisper](https://github.com/guillaumekln/faster-whisper) for optimized inference
 - [Tauri](https://tauri.app/) for the cross-platform desktop framework
 - [Deepgram](https://deepgram.com/) for cloud transcription API
--- a/backend/init.py
+++ b/backend/init.py
@@ -0,0 +1 @@
 """Backend package for headless transcription service."""
--- a/backend/api_server.py
+++ b/backend/api_server.py
@@ -0,0 +1,323 @@
 """FastAPI control API server for the headless transcription backend.
 Extends the existing OBS display server with REST endpoints and a
 control WebSocket channel so that a Tauri (or any other) frontend
 can drive the application.
 """
 import asyncio
 import json
 from datetime import datetime
 from typing import List, Optional
 from fastapi import FastAPI, WebSocket, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from backend.app_controller import AppController
 # ── Request / Response Models ──────────────────────────────────────
 class ConfigUpdate(BaseModel):
    """Batch config update payload. Keys use dot-notation."""
    settings: dict  # e.g. {"user.name": "Alice", "transcription.model": "small.en"}
 class LoginRequest(BaseModel):
    email: str
    password: str
    server_url: str
 class RegisterRequest(BaseModel):
    email: str
    password: str
    server_url: str
 class SkipVersionRequest(BaseModel):
    version: str
 class SaveFileRequest(BaseModel):
    path: str
    text: str
 # ── API Server ─────────────────────────────────────────────────────
 class APIServer:
    """Wraps AppController with a FastAPI application exposing control endpoints."""
    def __init__(self, controller: AppController):
        self.controller = controller
        self.control_connections: List[WebSocket] = []
        self.app = FastAPI(title="Local Transcription API", version="1.0.0")
        # Allow Tauri webview origin
        self.app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],  # Tauri uses tauri://localhost or https://tauri.localhost
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )
        self._setup_routes()
        self._wire_controller_callbacks()
    def _wire_controller_callbacks(self):
        """Wire AppController callbacks to broadcast over /ws/control."""
        original_state_cb = self.controller.on_state_changed
        def on_state_changed(state: str, message: str):
            if original_state_cb:
                original_state_cb(state, message)
            self._broadcast_control({"type": "state_changed", "state": state, "message": message})
        self.controller.on_state_changed = on_state_changed
        def on_transcription(data: dict):
            self._broadcast_control({"type": "transcription", **data})
        self.controller.on_transcription = on_transcription
        def on_preview(data: dict):
            self._broadcast_control({"type": "preview", **data})
        self.controller.on_preview = on_preview
        def on_error(msg: str):
            self._broadcast_control({"type": "error", "message": msg})
        self.controller.on_error = on_error
        def on_credits_low(seconds: int):
            self._broadcast_control({"type": "credits_low", "seconds_remaining": seconds})
        self.controller.on_credits_low = on_credits_low
    def _broadcast_control(self, data: dict):
        """Send a message to all connected /ws/control clients."""
        if not self.control_connections:
            return
        message = json.dumps(data)
        disconnected = []
        for ws in self.control_connections:
            try:
                asyncio.run_coroutine_threadsafe(
                    ws.send_text(message),
                    asyncio.get_event_loop(),
                )
            except Exception:
                disconnected.append(ws)
        for ws in disconnected:
            self.control_connections.remove(ws)
    def _setup_routes(self):
        """Register all API routes."""
        app = self.app
        ctrl = self.controller
        # ── Status ─────────────────────────────────────────────
        @app.get("/api/status")
        async def get_status():
            return ctrl.get_status()
        @app.get("/api/version")
        async def get_version():
            from version import __version__
            return {"version": __version__}
        # ── Transcription Control ──────────────────────────────
        @app.post("/api/start")
        async def start_transcription():
            success, message = ctrl.start_transcription()
            if not success:
                raise HTTPException(status_code=400, detail=message)
            return {"status": "ok", "message": message}
        @app.post("/api/stop")
        async def stop_transcription():
            success, message = ctrl.stop_transcription()
            if not success:
                raise HTTPException(status_code=400, detail=message)
            return {"status": "ok", "message": message}
        @app.post("/api/clear")
        async def clear_transcriptions():
            count = ctrl.clear_transcriptions()
            return {"status": "ok", "cleared": count}
        @app.get("/api/transcriptions")
        async def get_transcriptions():
            show_timestamps = ctrl.config.get('display.show_timestamps', True)
            return {
                "count": len(ctrl.transcriptions),
                "text": ctrl.get_transcriptions_text(include_timestamps=show_timestamps),
                "items": [
                    {
                        "text": r.text,
                        "user_name": r.user_name,
                        "timestamp": r.timestamp.strftime("%H:%M:%S") if r.timestamp else None,
                    }
                    for r in ctrl.transcriptions
                ],
            }
        @app.post("/api/save-file")
        async def save_file(req: SaveFileRequest):
            """Save text to a file (used by Tauri frontend after dialog)."""
            from pathlib import Path
            try:
                Path(req.path).write_text(req.text, encoding="utf-8")
                return {"status": "ok", "path": req.path}
            except Exception as e:
                raise HTTPException(status_code=500, detail=str(e))
        # ── Configuration ──────────────────────────────────────
        @app.get("/api/config")
        async def get_config():
            return ctrl.config.config
        @app.put("/api/config")
        async def update_config(update: ConfigUpdate):
            engine_reloaded, message = ctrl.apply_settings(update.settings)
            return {
                "status": "ok",
                "message": message,
                "engine_reloaded": engine_reloaded,
            }
        # ── Devices ────────────────────────────────────────────
        @app.get("/api/audio-devices")
        async def get_audio_devices():
            return {"devices": ctrl.get_audio_devices()}
        @app.get("/api/compute-devices")
        async def get_compute_devices():
            return {"devices": ctrl.get_compute_devices()}
        # ── Engine ─────────────────────────────────────────────
        @app.post("/api/reload-engine")
        async def reload_engine():
            success, message = ctrl.reload_engine()
            if not success:
                raise HTTPException(status_code=500, detail=message)
            return {"status": "ok", "message": message}
        # ── Updates ────────────────────────────────────────────
        @app.get("/api/check-update")
        async def check_update():
            return ctrl.check_for_updates()
        @app.post("/api/skip-version")
        async def skip_version(req: SkipVersionRequest):
            ctrl.skip_version(req.version)
            return {"status": "ok"}
        # ── Managed Mode Auth Proxy ────────────────────────────
        @app.post("/api/login")
        async def login(req: LoginRequest):
            """Proxy login to the transcription proxy server."""
            import requests as http_requests
            try:
                resp = http_requests.post(
                    f"{req.server_url}/api/auth/login",
                    json={"email": req.email, "password": req.password},
                    timeout=10,
                )
                if resp.status_code == 200:
                    data = resp.json()
                    ctrl.config.set('remote.auth_token', data.get('token', ''))
                    ctrl.config.set('remote.server_url', req.server_url)
                    return {"status": "ok", "token": data.get('token', '')}
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        @app.post("/api/register")
        async def register(req: RegisterRequest):
            """Proxy registration to the transcription proxy server."""
            import requests as http_requests
            try:
                resp = http_requests.post(
                    f"{req.server_url}/api/auth/register",
                    json={"email": req.email, "password": req.password},
                    timeout=10,
                )
                if resp.status_code in (200, 201):
                    return {"status": "ok", "data": resp.json()}
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        @app.get("/api/balance")
        async def get_balance():
            """Proxy balance check to the transcription proxy server."""
            import requests as http_requests
            server_url = ctrl.config.get('remote.server_url', '')
            token = ctrl.config.get('remote.auth_token', '')
            if not server_url or not token:
                raise HTTPException(status_code=400, detail="Not logged in to managed service")
            try:
                resp = http_requests.get(
                    f"{server_url}/api/billing/balance",
                    headers={"Authorization": f"Bearer {token}"},
                    timeout=10,
                )
                if resp.status_code == 200:
                    return resp.json()
                else:
                    raise HTTPException(status_code=resp.status_code, detail=resp.text)
            except http_requests.RequestException as e:
                raise HTTPException(status_code=502, detail=str(e))
        # ── Control WebSocket ──────────────────────────────────
        @app.websocket("/ws/control")
        async def websocket_control(websocket: WebSocket):
            """WebSocket channel for real-time state and transcription push."""
            await websocket.accept()
            self.control_connections.append(websocket)
            # Send current status on connect
            try:
                await websocket.send_json({
                    "type": "state_changed",
                    "state": ctrl.state,
                    "message": "Connected",
                })
            except Exception:
                pass
            try:
                while True:
                    # Keep alive -- client sends pings
                    await websocket.receive_text()
            except Exception:
                if websocket in self.control_connections:
                    self.control_connections.remove(websocket)
        # ── Mount the existing OBS display routes ──────────────
        # The OBS display (GET / and /ws) is handled by the
        # TranscriptionWebServer which shares the same Uvicorn
        # instance. We mount it as a sub-application so the
        # existing OBS URLs continue to work.
        if ctrl.web_server:
            app.mount("/obs", ctrl.web_server.app)
--- a/backend/app_controller.py
+++ b/backend/app_controller.py
@@ -0,0 +1,692 @@
 """Headless application controller for transcription backend.
 Extracts orchestration logic from gui/main_window_qt.py into a
 Qt-free class that manages engine lifecycle, web server, server sync,
 and configuration -- all accessible via callbacks instead of Qt signals.
 """
 import asyncio
 import time
 from datetime import datetime
 from pathlib import Path
 from threading import Thread, Lock
 from typing import Callable, List, Optional
 import sys
 # Add project root to path
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from client.config import Config
 from client.device_utils import DeviceManager
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from server.web_display import TranscriptionWebServer
 from version import __version__
 class AppState:
    """Enum-like class for application states."""
    INITIALIZING = "initializing"
    READY = "ready"
    TRANSCRIBING = "transcribing"
    RELOADING = "reloading"
    ERROR = "error"
 class WebServerThread(Thread):
    """Thread for running the web server."""
    def __init__(self, web_server: TranscriptionWebServer):
        super().__init__(daemon=True)
        self.web_server = web_server
        self.loop: Optional[asyncio.AbstractEventLoop] = None
        self.error: Optional[Exception] = None
    def run(self):
        try:
            self.loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self.loop)
            self.loop.run_until_complete(self.web_server.start())
        except Exception as e:
            self.error = e
            print(f"ERROR: Web server failed to start: {e}")
 class EngineInitThread(Thread):
    """Thread for initializing the transcription engine without blocking."""
    def __init__(self, engine, on_complete: Callable[[bool, str], None]):
        super().__init__(daemon=True)
        self.engine = engine
        self.on_complete = on_complete
    def run(self):
        try:
            success = self.engine.initialize()
            if success:
                self.on_complete(True, "Engine initialized successfully")
            else:
                self.on_complete(False, "Failed to initialize engine")
        except Exception as e:
            self.on_complete(False, f"Error initializing engine: {e}")
 class AppController:
    """Headless controller managing the transcription application lifecycle.
    This replaces the orchestration logic that previously lived in MainWindow.
    It manages:
    - Transcription engine lifecycle (init, start, stop, reload)
    - Web server for OBS display
    - Server sync for multi-user mode
    - Configuration
    - Update checking
    All state changes are communicated via callbacks, making it UI-agnostic.
    """
    def __init__(self, config: Optional[Config] = None):
        self.config = config or Config()
        self.device_manager = DeviceManager()
        # State
        self._state = AppState.INITIALIZING
        self._state_lock = Lock()
        self.is_transcribing = False
        # Engine
        self.transcription_engine = None
        self._engine_init_thread: Optional[EngineInitThread] = None
        self.current_model_size: Optional[str] = None
        self.current_device_config: Optional[str] = None
        # Web server
        self.web_server: Optional[TranscriptionWebServer] = None
        self.web_server_thread: Optional[WebServerThread] = None
        self.actual_web_port: Optional[int] = None
        # Server sync
        self.server_sync_client: Optional[ServerSyncClient] = None
        # Transcription storage
        self.transcriptions: List[TranscriptionResult] = []
        # Callbacks for state notifications (set by the frontend / API server)
        self.on_state_changed: Optional[Callable[[str, str], None]] = None  # (state, message)
        self.on_transcription: Optional[Callable[[dict], None]] = None  # final transcription
        self.on_preview: Optional[Callable[[dict], None]] = None  # realtime preview
        self.on_error: Optional[Callable[[str], None]] = None
        self.on_credits_low: Optional[Callable[[int], None]] = None
    @property
    def state(self) -> str:
        with self._state_lock:
            return self._state
    def _set_state(self, state: str, message: str = ""):
        with self._state_lock:
            self._state = state
        if self.on_state_changed:
            self.on_state_changed(state, message)
    # ── Lifecycle ──────────────────────────────────────────────────
    def initialize(self):
        """Initialize the web server and transcription engine.
        Call this once at startup. Non-blocking -- engine init happens
        in a background thread.
        """
        self._set_state(AppState.INITIALIZING, "Starting web server...")
        self._start_web_server()
        self._set_state(AppState.INITIALIZING, "Loading transcription engine...")
        self._initialize_engine()
    def shutdown(self):
        """Gracefully shut down all components."""
        # Stop transcription
        if self.is_transcribing:
            self.stop_transcription()
        # Stop web server
        if self.web_server_thread and self.web_server_thread.is_alive():
            try:
                if self.web_server_thread.loop:
                    self.web_server_thread.loop.call_soon_threadsafe(
                        self.web_server_thread.loop.stop
                    )
            except Exception as e:
                print(f"Warning: Error stopping web server: {e}")
        # Stop transcription engine
        if self.transcription_engine:
            try:
                self.transcription_engine.stop()
            except Exception as e:
                print(f"Warning: Error stopping engine: {e}")
        # Wait for engine init thread
        if self._engine_init_thread and self._engine_init_thread.is_alive():
            self._engine_init_thread.join(timeout=5)
    # ── Web Server ─────────────────────────────────────────────────
    def _start_web_server(self):
        """Start the FastAPI web server for OBS display."""
        try:
            host = self.config.get('web_server.host', '127.0.0.1')
            port = self.config.get('web_server.port', 8080)
            # Gather display settings
            ws_kwargs = self._get_web_server_kwargs(host, port)
            # Try up to 5 ports
            ports_to_try = [port] + [port + i for i in range(1, 5)]
            for try_port in ports_to_try:
                print(f"Attempting to start web server at http://{host}:{try_port}")
                ws_kwargs['port'] = try_port
                self.web_server = TranscriptionWebServer(**ws_kwargs)
                self.web_server_thread = WebServerThread(self.web_server)
                self.web_server_thread.start()
                time.sleep(0.5)
                if self.web_server_thread.error:
                    error_str = str(self.web_server_thread.error)
                    if "address already in use" in error_str.lower() or "errno 98" in error_str.lower():
                        print(f"Port {try_port} is in use, trying next port...")
                        self.web_server = None
                        self.web_server_thread = None
                        continue
                    else:
                        print(f"Web server failed to start: {self.web_server_thread.error}")
                        self.web_server = None
                        self.web_server_thread = None
                        break
                else:
                    self.actual_web_port = try_port
                    print(f"Web server started at http://{host}:{try_port}")
                    return
            print(f"WARNING: Could not start web server on any port")
        except Exception as e:
            print(f"ERROR: Failed to initialize web server: {e}")
            self.web_server = None
            self.web_server_thread = None
    def _get_web_server_kwargs(self, host: str, port: int) -> dict:
        """Build kwargs dict for TranscriptionWebServer from config."""
        return dict(
            host=host,
            port=port,
            show_timestamps=self.config.get('display.show_timestamps', True),
            fade_after_seconds=self.config.get('display.fade_after_seconds', 10),
            max_lines=self.config.get('display.max_lines', 50),
            font_family=self.config.get('display.font_family', 'Arial'),
            font_size=self.config.get('display.font_size', 16),
            fonts_dir=self.config.fonts_dir,
            font_source=self.config.get('display.font_source', 'System Font'),
            websafe_font=self.config.get('display.websafe_font', 'Arial'),
            google_font=self.config.get('display.google_font', 'Roboto'),
            user_color=self.config.get('display.user_color', '#4CAF50'),
            text_color=self.config.get('display.text_color', '#FFFFFF'),
            background_color=self.config.get('display.background_color', '#000000B3'),
        )
    # ── Transcription Engine ───────────────────────────────────────
    def _initialize_engine(self):
        """Initialize the transcription engine in a background thread."""
        device_config = self.config.get('transcription.device', 'auto')
        self.device_manager.set_device(device_config)
        audio_device_str = self.config.get('audio.input_device', 'default')
        audio_device = None if audio_device_str == 'default' else int(audio_device_str)
        model = self.config.get('transcription.model', 'base.en')
        language = self.config.get('transcription.language', 'en')
        device = self.device_manager.get_device_for_whisper()
        compute_type = self.config.get('transcription.compute_type', 'default')
        self.current_model_size = model
        self.current_device_config = device_config
        user_name = self.config.get('user.name', 'User')
        continuous_mode = self.config.get('transcription.continuous_mode', False)
        if continuous_mode:
            post_speech_silence = 0.15
            min_gap = 0.0
            min_recording = 0.3
        else:
            post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
            min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
            min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok'):
            self.transcription_engine = DeepgramTranscriptionEngine(
                config=self.config,
                user_name=user_name,
                input_device_index=audio_device,
            )
            self.transcription_engine.set_callbacks(
                realtime_callback=self._on_realtime_transcription,
                final_callback=self._on_final_transcription,
            )
            self.transcription_engine.set_error_callback(self._on_remote_error)
            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
        else:
            self.transcription_engine = RealtimeTranscriptionEngine(
                model=model,
                device=device,
                language=language,
                compute_type=compute_type,
                enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
                realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
                realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
                silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
                silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
                webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
                post_speech_silence_duration=post_speech_silence,
                min_length_of_recording=min_recording,
                min_gap_between_recordings=min_gap,
                pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
                beam_size=self.config.get('transcription.beam_size', 5),
                initial_prompt=self.config.get('transcription.initial_prompt', ''),
                no_log_file=self.config.get('transcription.no_log_file', True),
                input_device_index=audio_device,
                user_name=user_name,
            )
            self.transcription_engine.set_callbacks(
                realtime_callback=self._on_realtime_transcription,
                final_callback=self._on_final_transcription,
            )
        # Start init in background thread
        self._engine_init_thread = EngineInitThread(
            self.transcription_engine,
            self._on_engine_ready,
        )
        self._engine_init_thread.start()
    def _on_engine_ready(self, success: bool, message: str):
        """Called from EngineInitThread when engine init completes."""
        if success:
            remote_mode = self.config.get('remote.mode', 'local')
            if remote_mode in ('managed', 'byok'):
                mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
                device_display = f"Deepgram ({mode_label})"
            elif self.transcription_engine:
                actual_device = self.transcription_engine.device
                compute_type = self.transcription_engine.compute_type
                device_display = f"{actual_device.upper()} ({compute_type})"
            else:
                device_display = "Unknown"
            self._set_state(AppState.READY, f"Ready | Device: {device_display}")
        else:
            self._set_state(AppState.ERROR, message)
    # ── Transcription Control ──────────────────────────────────────
    def start_transcription(self) -> tuple[bool, str]:
        """Start transcription. Returns (success, message)."""
        if self.is_transcribing:
            return False, "Already transcribing"
        if not self.transcription_engine or not self.transcription_engine.is_ready():
            return False, "Transcription engine not ready"
        try:
            success = self.transcription_engine.start_recording()
            if not success:
                return False, "Failed to start recording"
            # Start server sync if enabled
            if self.config.get('server_sync.enabled', False):
                self._start_server_sync()
            self.is_transcribing = True
            self._set_state(AppState.TRANSCRIBING, "Transcribing...")
            return True, "Transcription started"
        except Exception as e:
            return False, f"Failed to start transcription: {e}"
    def stop_transcription(self) -> tuple[bool, str]:
        """Stop transcription. Returns (success, message)."""
        if not self.is_transcribing:
            return False, "Not transcribing"
        try:
            if self.transcription_engine:
                self.transcription_engine.stop_recording()
            if self.server_sync_client:
                self.server_sync_client.stop()
                self.server_sync_client = None
            self.is_transcribing = False
            self._set_state(AppState.READY, "Ready")
            return True, "Transcription stopped"
        except Exception as e:
            return False, f"Failed to stop transcription: {e}"
    def clear_transcriptions(self) -> int:
        """Clear stored transcriptions. Returns count of cleared items."""
        count = len(self.transcriptions)
        self.transcriptions.clear()
        return count
    def get_transcriptions_text(self, include_timestamps: bool = True) -> str:
        """Get all transcriptions as formatted text."""
        lines = []
        for result in self.transcriptions:
            parts = []
            if include_timestamps:
                parts.append(f"[{result.timestamp.strftime('%H:%M:%S')}]")
            if result.user_name and result.user_name.strip():
                parts.append(f"{result.user_name}:")
            parts.append(result.text)
            lines.append(" ".join(parts))
        return "\n".join(lines)
    def reload_engine(self) -> tuple[bool, str]:
        """Reload the transcription engine with current config settings."""
        try:
            was_transcribing = self.is_transcribing
            if was_transcribing:
                self.stop_transcription()
            self._set_state(AppState.RELOADING, "Reloading engine...")
            # Wait for any existing init thread
            if self._engine_init_thread and self._engine_init_thread.is_alive():
                self._engine_init_thread.join(timeout=10)
            # Stop current engine
            if self.transcription_engine:
                try:
                    self.transcription_engine.stop()
                except Exception as e:
                    print(f"Warning: Error stopping engine: {e}")
            # Re-initialize
            self._initialize_engine()
            return True, "Engine reload initiated"
        except Exception as e:
            self._set_state(AppState.ERROR, f"Engine reload failed: {e}")
            return False, str(e)
    # ── Transcription Callbacks ────────────────────────────────────
    def _on_realtime_transcription(self, result: TranscriptionResult):
        """Handle realtime (preview) transcription."""
        if not self.is_transcribing:
            return
        try:
            # Broadcast to web server
            if self.web_server and self.web_server_thread and self.web_server_thread.loop:
                asyncio.run_coroutine_threadsafe(
                    self.web_server.broadcast_preview(
                        result.text, result.user_name, result.timestamp
                    ),
                    self.web_server_thread.loop,
                )
            # Send to server sync
            if self.server_sync_client:
                self.server_sync_client.send_preview(result.text, result.timestamp)
            # Notify frontend
            if self.on_preview:
                self.on_preview({
                    "text": result.text,
                    "user_name": result.user_name,
                    "timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
                    "is_preview": True,
                })
        except Exception as e:
            print(f"Error handling realtime transcription: {e}")
    def _on_final_transcription(self, result: TranscriptionResult):
        """Handle final transcription."""
        if not self.is_transcribing:
            return
        try:
            self.transcriptions.append(result)
            # Broadcast to web server
            if self.web_server and self.web_server_thread and self.web_server_thread.loop:
                asyncio.run_coroutine_threadsafe(
                    self.web_server.broadcast_transcription(
                        result.text, result.user_name, result.timestamp
                    ),
                    self.web_server_thread.loop,
                )
            # Send to server sync
            if self.server_sync_client:
                self.server_sync_client.send_transcription(
                    result.text, result.timestamp
                )
            # Notify frontend
            if self.on_transcription:
                self.on_transcription({
                    "text": result.text,
                    "user_name": result.user_name,
                    "timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
                    "is_preview": False,
                })
        except Exception as e:
            print(f"Error handling final transcription: {e}")
    def _on_remote_error(self, error_msg: str):
        """Handle error from remote transcription service."""
        print(f"Remote transcription error: {error_msg}")
        if self.on_error:
            self.on_error(error_msg)
    def _on_credits_low(self, seconds_remaining: int):
        """Handle low credits warning from proxy."""
        if self.on_credits_low:
            self.on_credits_low(seconds_remaining)
    # ── Server Sync ────────────────────────────────────────────────
    def _start_server_sync(self):
        """Start server sync client."""
        try:
            url = self.config.get('server_sync.url', '')
            if not url:
                print("Server sync enabled but no URL configured")
                return
            room = self.config.get('server_sync.room', 'default')
            passphrase = self.config.get('server_sync.passphrase', '')
            user_name = self.config.get('user.name', 'User')
            fonts_dir = self.config.fonts_dir
            font_source = self.config.get('display.font_source', 'System Font')
            if font_source == "System Font":
                font_source = "None"
            self.server_sync_client = ServerSyncClient(
                url=url,
                room=room,
                passphrase=passphrase,
                user_name=user_name,
                fonts_dir=fonts_dir,
                font_source=font_source,
                websafe_font=self.config.get('display.websafe_font', '') or None,
                google_font=self.config.get('display.google_font', '') or None,
                custom_font_file=self.config.get('display.custom_font_file', '') or None,
                user_color=self.config.get('display.user_color', '#4CAF50'),
                text_color=self.config.get('display.text_color', '#FFFFFF'),
                background_color=self.config.get('display.background_color', '#000000B3'),
            )
            self.server_sync_client.start()
        except Exception as e:
            print(f"Error starting server sync: {e}")
    # ── Configuration ──────────────────────────────────────────────
    def apply_settings(self, new_config: Optional[dict] = None) -> tuple[bool, str]:
        """Apply settings changes. If new_config is provided, merge it first.
        Returns (engine_reload_needed, message).
        """
        if new_config:
            for key, value in new_config.items():
                self.config.set(key, value)
        # Update web server display settings
        if self.web_server:
            self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
            self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
            self.web_server.max_lines = self.config.get('display.max_lines', 50)
            self.web_server.font_family = self.config.get('display.font_family', 'Arial')
            self.web_server.font_size = self.config.get('display.font_size', 16)
            self.web_server.font_source = self.config.get('display.font_source', 'System Font')
            self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
            self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
            self.web_server.user_color = self.config.get('display.user_color', '#4CAF50')
            self.web_server.text_color = self.config.get('display.text_color', '#FFFFFF')
            self.web_server.background_color = self.config.get('display.background_color', '#000000B3')
        # Restart server sync if running
        if self.is_transcribing and self.server_sync_client:
            self.server_sync_client.stop()
            self.server_sync_client = None
            if self.config.get('server_sync.enabled', False):
                self._start_server_sync()
        # Check if model/device changed
        new_model = self.config.get('transcription.model', 'base.en')
        new_device = self.config.get('transcription.device', 'auto')
        engine_reload_needed = (
            self.current_model_size != new_model
            or self.current_device_config != new_device
        )
        if engine_reload_needed:
            self.reload_engine()
            return True, "Settings applied. Engine reloading with new model/device."
        else:
            return False, "Settings applied successfully."
    def get_status(self) -> dict:
        """Get current application status as a dict."""
        host = self.config.get('web_server.host', '127.0.0.1')
        port = self.actual_web_port or self.config.get('web_server.port', 8080)
        device_info = self.device_manager.get_device_info()
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok') and self.transcription_engine:
            mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
            engine_device = f"Deepgram ({mode_label})"
        elif self.transcription_engine and hasattr(self.transcription_engine, 'device'):
            engine_device = f"{self.transcription_engine.device.upper()} ({self.transcription_engine.compute_type})"
        else:
            engine_device = "Not initialized"
        return {
            "state": self.state,
            "is_transcribing": self.is_transcribing,
            "version": __version__,
            "engine_device": engine_device,
            "web_server": {
                "host": host,
                "port": port,
                "url": f"http://{host}:{port}",
                "running": self.web_server_thread is not None and self.web_server_thread.is_alive(),
            },
            "transcription_count": len(self.transcriptions),
            "remote_mode": remote_mode,
            "server_sync_enabled": self.config.get('server_sync.enabled', False),
        }
    def get_audio_devices(self) -> list[dict]:
        """List available audio input devices."""
        import sounddevice as sd
        devices = []
        try:
            device_list = sd.query_devices()
            for i, device in enumerate(device_list):
                if device['max_input_channels'] > 0:
                    devices.append({"index": i, "name": device['name']})
        except Exception:
            pass
        if not devices:
            devices = [{"index": 0, "name": "Default"}]
        return devices
    def get_compute_devices(self) -> list[dict]:
        """List available compute devices."""
        device_info = self.device_manager.get_device_info()
        devices = [{"id": "auto", "name": "Auto-detect"}]
        for dev_id, dev_name in device_info:
            devices.append({"id": dev_id, "name": dev_name})
        return devices
    # ── Update Checking ────────────────────────────────────────────
    def check_for_updates(self) -> dict:
        """Check for updates synchronously. Returns update info or None."""
        from client.update_checker import UpdateChecker
        gitea_url = self.config.get('updates.gitea_url', 'https://repo.anhonesthost.net')
        owner = self.config.get('updates.owner', 'streamer-tools')
        repo = self.config.get('updates.repo', 'local-transcription')
        if not gitea_url or not owner or not repo:
            return {"available": False, "error": "Update checking not configured"}
        checker = UpdateChecker(
            current_version=__version__,
            gitea_url=gitea_url,
            owner=owner,
            repo=repo,
        )
        try:
            release_info = checker.check_for_update()
            self.config.set('updates.last_check', datetime.now().isoformat())
            if release_info:
                skipped = self.config.get('updates.skipped_versions', [])
                return {
                    "available": True,
                    "version": release_info.version,
                    "download_url": release_info.download_url,
                    "release_notes": release_info.release_notes,
                    "skipped": release_info.version in skipped,
                }
            else:
                return {"available": False, "current_version": __version__}
        except Exception as e:
            return {"available": False, "error": str(e)}
    def skip_version(self, version: str):
        """Mark a version as skipped for update notifications."""
        skipped = self.config.get('updates.skipped_versions', [])
        if version not in skipped:
            skipped.append(version)
            self.config.set('updates.skipped_versions', skipped)
--- a/backend/main_headless.py
+++ b/backend/main_headless.py
@@ -0,0 +1,126 @@
 #!/usr/bin/env python3
 """Headless entry point for the Local Transcription backend.
 Runs the transcription engine + API server without any GUI (no PySide6).
 Designed to be launched as a Tauri sidecar or run standalone for development.
 Usage:
    python -m backend.main_headless [--port PORT] [--host HOST]
 The backend prints the actual port to stdout as JSON on startup:
    {"event": "ready", "port": 8080}
 This allows the Tauri shell to discover which port the backend bound to.
 """
 import argparse
 import json
 import multiprocessing
 import os
 import signal
 import sys
 from pathlib import Path
 # Must be called before anything else for PyInstaller compatibility
 multiprocessing.freeze_support()
 if __name__ == "__main__":
    try:
        multiprocessing.set_start_method('spawn', force=True)
    except RuntimeError:
        pass
 # Add project root to path
 project_root = Path(__file__).resolve().parent.parent
 sys.path.insert(0, str(project_root))
 os.chdir(project_root)
 from client.instance_lock import InstanceLock
 def main():
    parser = argparse.ArgumentParser(description="Local Transcription headless backend")
    parser.add_argument("--host", default="127.0.0.1", help="API server host (default: 127.0.0.1)")
    parser.add_argument("--port", type=int, default=8080, help="API server port (default: 8080)")
    args = parser.parse_args()
    instance_lock = InstanceLock()
    if not instance_lock.acquire():
        print(json.dumps({"event": "error", "message": "Another instance is already running"}),
              flush=True)
        sys.exit(1)
    def handle_shutdown(signum, frame):
        print(json.dumps({"event": "shutdown"}), flush=True)
        if controller:
            controller.shutdown()
        instance_lock.release()
        sys.exit(0)
    signal.signal(signal.SIGTERM, handle_shutdown)
    signal.signal(signal.SIGINT, handle_shutdown)
    controller = None
    try:
        from backend.app_controller import AppController
        from backend.api_server import APIServer
        # Override web server port from CLI arg
        from client.config import Config
        config = Config()
        config.set('web_server.host', args.host)
        config.set('web_server.port', args.port)
        # Create controller and initialize
        controller = AppController(config=config)
        # Wire a state callback that prints the ready event
        def on_state_changed(state, message):
            event = {"event": "state", "state": state, "message": message}
            print(json.dumps(event), flush=True)
        controller.on_state_changed = on_state_changed
        # Initialize engine + web server
        controller.initialize()
        # Create API server wrapping the controller
        api_server = APIServer(controller)
        # Determine actual port (web server may have shifted if port was in use)
        actual_port = controller.actual_web_port or args.port
        # Print ready event so Tauri can discover the port
        print(json.dumps({"event": "ready", "port": actual_port}), flush=True)
        # Run the API server (blocks)
        import uvicorn
        import logging
        logging.getLogger("uvicorn").setLevel(logging.ERROR)
        logging.getLogger("uvicorn.access").setLevel(logging.ERROR)
        uvicorn.run(
            api_server.app,
            host=args.host,
            port=actual_port + 1,  # API on port+1, OBS display on the main port
            log_level="error",
            access_log=False,
        )
    except KeyboardInterrupt:
        print(json.dumps({"event": "shutdown", "reason": "keyboard_interrupt"}), flush=True)
    except Exception as e:
        print(json.dumps({"event": "error", "message": str(e)}), flush=True)
        import traceback
        traceback.print_exc()
        sys.exit(1)
    finally:
        if controller:
            controller.shutdown()
        instance_lock.release()
 if __name__ == "__main__":
    main()
--- a/client/config.py
+++ b/client/config.py
@@ -48,6 +48,25 @@ class Config:
            # Save the default configuration
            self.save()
        # Migrate remote_processing -> remote
        self._migrate_remote_config()
    def _migrate_remote_config(self):
        """Migrate old remote_processing config to new remote config."""
        if 'remote_processing' in self.config and 'remote' not in self.config:
            old = self.config['remote_processing']
            self.config['remote'] = {
                'mode': 'managed' if old.get('enabled', False) else 'local',
                'server_url': old.get('server_url', ''),
                'auth_token': '',
                'byok_api_key': old.get('api_key', ''),
                'deepgram_model': 'nova-2',
                'language': 'en-US',
                'fallback_to_local': old.get('fallback_to_local', True),
            }
            del self.config['remote_processing']
            self.save()
    def save(self) -> None:
        """Save current configuration to file."""
        with open(self.config_path, 'w') as f:
--- a/client/deepgram_transcription.py
+++ b/client/deepgram_transcription.py
@@ -0,0 +1,528 @@
 """Deepgram-based transcription engine using WebSocket streaming.
 Supports two modes:
  - Managed mode: connects to a proxy server that handles Deepgram credentials
  - BYOK mode: connects directly to the Deepgram API with a user-provided key
 Implements the same duck-type interface as RealtimeTranscriptionEngine so
 MainWindow can use it as a drop-in replacement.
 """
 import asyncio
 import json
 import logging
 import numpy as np
 import threading
 from datetime import datetime
 from queue import Queue, Empty
 from typing import Optional, Callable
 from client.transcription_engine_realtime import TranscriptionResult
 logger = logging.getLogger(__name__)
 class DeepgramTranscriptionEngine:
    """
    Transcription engine that streams audio to Deepgram via WebSocket.
    In managed mode the connection goes through a proxy at
    ``wss://<server>/ws/transcribe`` which handles authentication and
    Deepgram credentials.  In BYOK (bring-your-own-key) mode the
    connection goes directly to the Deepgram API.
    """
    # ------------------------------------------------------------------ #
    #  Construction / configuration
    # ------------------------------------------------------------------ #
    def __init__(self, config, user_name: str = "User", input_device_index: Optional[int] = None):
        """
        Initialise the engine from a :class:`client.config.Config` object.
        Args:
            config: Application ``Config`` instance.
            user_name: Display name attached to transcriptions.
            input_device_index: Index of the audio input device to use
                (``None`` for the system default).
        """
        self.config = config
        self.user_name = user_name
        self.input_device_index = input_device_index
        # Mode: 'managed' (proxy) or 'byok' (direct Deepgram)
        self.mode: str = config.get("remote.mode", "managed")
        # Managed-mode settings
        self.server_url: str = config.get("remote.server_url", "")
        self.auth_token: str = config.get("remote.auth_token", "")
        # BYOK-mode settings
        self.byok_api_key: str = config.get("remote.byok_api_key", "")
        # Deepgram model / language (used in both modes)
        self.deepgram_model: str = config.get("remote.deepgram_model", "nova-2")
        self.language: str = config.get("remote.language", "en-US")
        # Audio parameters
        self.sample_rate: int = 16000
        self.channels: int = 1
        self.blocksize: int = 4096
        # Callbacks
        self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
        self.final_callback: Optional[Callable[[TranscriptionResult], None]] = None
        self._on_error: Optional[Callable[[str], None]] = None
        self._on_credits_low: Optional[Callable[[int], None]] = None
        # Internal state
        self._is_initialized: bool = False
        self._is_recording: bool = False
        self._stop_event: threading.Event = threading.Event()
        self._audio_queue: Queue = Queue()
        # Asyncio event loop running in a daemon thread
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._thread: Optional[threading.Thread] = None
        # WebSocket handle (set inside the async context)
        self._ws = None
        # sounddevice InputStream
        self._stream = None
    # ------------------------------------------------------------------ #
    #  Callback setters
    # ------------------------------------------------------------------ #
    def set_callbacks(
        self,
        realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None,
        final_callback: Optional[Callable[[TranscriptionResult], None]] = None,
    ):
        """Set transcription result callbacks (matches RealtimeTranscriptionEngine API)."""
        self.realtime_callback = realtime_callback
        self.final_callback = final_callback
    def set_error_callback(self, fn: Optional[Callable[[str], None]]):
        """Set a callback invoked on errors.  ``fn`` receives a string message."""
        self._on_error = fn
    def set_credits_low_callback(self, fn: Optional[Callable[[int], None]]):
        """Set a callback for low-credit warnings.  ``fn`` receives seconds remaining."""
        self._on_credits_low = fn
    # ------------------------------------------------------------------ #
    #  Public interface (duck-typed with RealtimeTranscriptionEngine)
    # ------------------------------------------------------------------ #
    def initialize(self) -> bool:
        """Validate configuration and mark the engine as ready.
        Returns ``True`` when the engine is ready to start recording.
        """
        if self._is_initialized:
            return True
        if self.mode == "managed":
            if not self.server_url:
                logger.error("Managed mode requires a server URL (remote.server_url)")
                return False
            if not self.auth_token:
                logger.error("Managed mode requires an auth token (remote.auth_token)")
                return False
        elif self.mode == "byok":
            if not self.byok_api_key:
                logger.error("BYOK mode requires an API key (remote.byok_api_key)")
                return False
        else:
            logger.error("Unknown remote mode: %s (expected 'managed' or 'byok')", self.mode)
            return False
        self._is_initialized = True
        logger.info("DeepgramTranscriptionEngine initialised in %s mode", self.mode)
        return True
    def start_recording(self) -> bool:
        """Open the audio stream and connect the WebSocket.
        Returns ``True`` on success.
        """
        if not self._is_initialized:
            logger.error("Engine not initialised -- call initialize() first")
            return False
        if self._is_recording:
            return True
        self._stop_event.clear()
        self._is_recording = True
        # Start the asyncio event-loop thread (handles WS send/receive)
        self._thread = threading.Thread(target=self._run_event_loop, daemon=True)
        self._thread.start()
        # Start the audio capture stream
        try:
            self._start_audio_stream()
        except Exception as exc:
            logger.error("Failed to open audio stream: %s", exc)
            self._is_recording = False
            self._stop_event.set()
            return False
        logger.info("Recording started")
        return True
    def stop_recording(self):
        """Stop audio capture and close the WebSocket."""
        if not self._is_recording:
            return
        self._is_recording = False
        self._stop_event.set()
        # Stop audio stream
        self._stop_audio_stream()
        # Close WebSocket from outside the event-loop thread
        if self._ws is not None and self._loop is not None and not self._loop.is_closed():
            asyncio.run_coroutine_threadsafe(self._close_ws(), self._loop)
        # Wait for the thread to finish
        if self._thread is not None:
            self._thread.join(timeout=5)
            self._thread = None
        logger.info("Recording stopped")
    def stop(self):
        """Full shutdown -- stop recording and release all resources."""
        self.stop_recording()
        self._is_initialized = False
        logger.info("DeepgramTranscriptionEngine shut down")
    def is_ready(self) -> bool:
        """Return ``True`` if the engine has been successfully initialised."""
        return self._is_initialized
    # ------------------------------------------------------------------ #
    #  Audio capture (sounddevice)
    # ------------------------------------------------------------------ #
    def _start_audio_stream(self):
        """Open a ``sounddevice.InputStream`` that feeds the audio queue."""
        import sounddevice as sd
        def _audio_callback(indata, frames, time_info, status):  # noqa: ARG001
            if status:
                logger.warning("Audio stream status: %s", status)
            if self._is_recording:
                # float32 -> int16 PCM bytes
                pcm = (indata * 32767).astype(np.int16).tobytes()
                self._audio_queue.put(pcm)
        self._stream = sd.InputStream(
            samplerate=self.sample_rate,
            blocksize=self.blocksize,
            channels=self.channels,
            dtype="float32",
            device=self.input_device_index,
            callback=_audio_callback,
        )
        self._stream.start()
    def _stop_audio_stream(self):
        """Close the audio input stream."""
        if self._stream is not None:
            try:
                self._stream.stop()
                self._stream.close()
            except Exception as exc:
                logger.debug("Error closing audio stream: %s", exc)
            finally:
                self._stream = None
    # ------------------------------------------------------------------ #
    #  Asyncio event-loop (runs in daemon thread)
    # ------------------------------------------------------------------ #
    def _run_event_loop(self):
        """Entry point for the daemon thread -- runs the async event loop."""
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)
        try:
            self._loop.run_until_complete(self._ws_lifecycle())
        except Exception as exc:
            logger.error("Event-loop error: %s", exc)
        finally:
            try:
                self._loop.run_until_complete(self._loop.shutdown_asyncgens())
            except Exception:
                pass
            self._loop.close()
            self._loop = None
    async def _ws_lifecycle(self):
        """Connect, authenticate (if managed), then run send/receive loops."""
        import websockets
        try:
            ws_url, extra_headers = self._build_ws_url_and_headers()
            logger.info("Connecting to %s", ws_url)
            self._ws = await websockets.connect(
                ws_url,
                additional_headers=extra_headers,
                ping_interval=20,
                ping_timeout=10,
            )
            # Managed mode: send auth message and wait for ready
            if self.mode == "managed":
                if not await self._managed_handshake():
                    return
            # Run send and receive concurrently
            await asyncio.gather(
                self._send_loop(),
                self._receive_loop(),
            )
        except asyncio.CancelledError:
            pass
        except Exception as exc:
            msg = f"WebSocket error: {exc}"
            logger.error(msg)
            if self._on_error:
                self._on_error(msg)
        finally:
            await self._close_ws()
    def _build_ws_url_and_headers(self):
        """Return ``(url, headers)`` depending on the current mode."""
        if self.mode == "managed":
            # Ensure the server URL uses wss:// and append the path
            url = self.server_url.rstrip("/")
            if not url.startswith("ws://") and not url.startswith("wss://"):
                url = f"wss://{url}"
            url = f"{url}/ws/transcribe"
            return url, {}
        # BYOK -- connect directly to Deepgram
        params = (
            f"model={self.deepgram_model}"
            f"&language={self.language}"
            "&interim_results=true"
            "&encoding=linear16"
            f"&sample_rate={self.sample_rate}"
            f"&channels={self.channels}"
        )
        url = f"wss://api.deepgram.com/v1/listen?{params}"
        headers = {"Authorization": f"Token {self.byok_api_key}"}
        return url, headers
    # -- managed-mode handshake ---------------------------------------- #
    async def _managed_handshake(self) -> bool:
        """Send auth message and wait for ``ready`` (managed mode).
        Returns ``True`` on success.
        """
        auth_msg = {
            "type": "auth",
            "token": self.auth_token,
            "config": {
                "model": self.deepgram_model,
                "language": self.language,
                "sample_rate": self.sample_rate,
                "channels": self.channels,
                "encoding": "linear16",
                "interim_results": True,
            },
        }
        await self._ws.send(json.dumps(auth_msg))
        try:
            raw = await asyncio.wait_for(self._ws.recv(), timeout=15)
            data = json.loads(raw)
            if data.get("type") == "ready":
                logger.info("Managed proxy is ready")
                return True
            if data.get("type") == "error":
                err = data.get("message", "unknown error")
                logger.error("Auth error from proxy: %s", err)
                if self._on_error:
                    self._on_error(f"Proxy auth error: {err}")
                return False
            logger.warning("Unexpected handshake message: %s", data)
            return False
        except asyncio.TimeoutError:
            logger.error("Timed out waiting for proxy ready message")
            if self._on_error:
                self._on_error("Timed out waiting for proxy ready message")
            return False
    # -- send loop ----------------------------------------------------- #
    async def _send_loop(self):
        """Drain the audio queue and push raw PCM bytes over the WebSocket."""
        while not self._stop_event.is_set():
            try:
                pcm_bytes = self._audio_queue.get(timeout=0.1)
            except Empty:
                continue
            try:
                await self._ws.send(pcm_bytes)
            except Exception as exc:
                if not self._stop_event.is_set():
                    logger.error("Send error: %s", exc)
                break
    # -- receive loop -------------------------------------------------- #
    async def _receive_loop(self):
        """Listen for messages from the WebSocket and dispatch them."""
        while not self._stop_event.is_set():
            try:
                raw = await asyncio.wait_for(self._ws.recv(), timeout=1.0)
            except asyncio.TimeoutError:
                continue
            except Exception as exc:
                if not self._stop_event.is_set():
                    logger.error("Receive error: %s", exc)
                break
            try:
                data = json.loads(raw)
            except (json.JSONDecodeError, TypeError):
                logger.debug("Non-JSON message received, ignoring")
                continue
            if self.mode == "managed":
                self._handle_managed_message(data)
            else:
                self._handle_byok_message(data)
    # ------------------------------------------------------------------ #
    #  Message handlers
    # ------------------------------------------------------------------ #
    def _handle_managed_message(self, data: dict):
        """Process a message from the managed proxy."""
        msg_type = data.get("type", "")
        if msg_type == "transcript":
            text = data.get("text", "")
            is_final = data.get("is_final", False)
            if text.strip():
                result = TranscriptionResult(
                    text=text,
                    is_final=is_final,
                    timestamp=datetime.now(),
                    user_name=self.user_name,
                )
                if is_final:
                    if self.final_callback:
                        self.final_callback(result)
                else:
                    if self.realtime_callback:
                        self.realtime_callback(result)
        elif msg_type == "credits_low":
            seconds_remaining = data.get("seconds_remaining", 0)
            logger.warning("Credits low -- %d seconds remaining", seconds_remaining)
            if self._on_credits_low:
                self._on_credits_low(int(seconds_remaining))
        elif msg_type == "error":
            code = data.get("code", "")
            message = data.get("message", "Unknown error")
            logger.error("Proxy error [%s]: %s", code, message)
            if self._on_error:
                self._on_error(f"[{code}] {message}" if code else message)
        elif msg_type == "session_end":
            seconds_used = data.get("seconds_used", 0)
            logger.info("Session ended -- %d seconds used", seconds_used)
        elif msg_type == "ready":
            # May arrive again after reconnects; safe to ignore.
            logger.debug("Received ready message (already connected)")
        else:
            logger.debug("Unhandled managed message type: %s", msg_type)
    def _handle_byok_message(self, data: dict):
        """Process a message received directly from the Deepgram API."""
        msg_type = data.get("type", "")
        if msg_type == "Results":
            channel = data.get("channel", {})
            alternatives = channel.get("alternatives", [])
            if not alternatives:
                return
            transcript = alternatives[0].get("transcript", "")
            is_final = data.get("is_final", False)
            if transcript.strip():
                result = TranscriptionResult(
                    text=transcript,
                    is_final=is_final,
                    timestamp=datetime.now(),
                    user_name=self.user_name,
                )
                if is_final:
                    if self.final_callback:
                        self.final_callback(result)
                else:
                    if self.realtime_callback:
                        self.realtime_callback(result)
        elif msg_type == "Metadata":
            logger.debug("Deepgram metadata: %s", data)
        elif msg_type == "UtteranceEnd":
            logger.debug("Deepgram utterance end")
        else:
            logger.debug("Unhandled Deepgram message type: %s", msg_type)
    # ------------------------------------------------------------------ #
    #  Helpers
    # ------------------------------------------------------------------ #
    async def _close_ws(self):
        """Close the WebSocket connection if open."""
        if self._ws is not None:
            try:
                await self._ws.close()
            except Exception:
                pass
            self._ws = None
    def set_user_name(self, user_name: str):
        """Update the user name attached to future transcriptions."""
        self.user_name = user_name
    def is_recording_active(self) -> bool:
        """Return ``True`` if audio is currently being captured."""
        return self._is_recording
    def __repr__(self) -> str:
        return (
            f"DeepgramTranscriptionEngine(mode={self.mode}, "
            f"recording={self._is_recording})"
        )
    def __del__(self):
        """Best-effort cleanup."""
        try:
            self.stop()
        except Exception:
            pass
--- a/config/default_config.yaml
+++ b/config/default_config.yaml
@@ -68,11 +68,14 @@ web_server:
  port: 8080
  host: "127.0.0.1"
-remote_processing:
+remote:
-  enabled: false  # Enable remote transcription offloading
+  mode: local  # local | managed | byok
-  server_url: ""  # WebSocket URL of remote transcription service (e.g., ws://your-server:8765/ws/transcribe)
+  server_url: ""  # Proxy server URL for managed mode (e.g., wss://your-proxy.com)
-  api_key: ""  # API key for authentication
+  auth_token: ""  # JWT stored after login (managed mode)
-  fallback_to_local: true  # Fall back to local processing if remote fails
+  byok_api_key: ""  # Deepgram API key for BYOK mode
  deepgram_model: nova-2  # Deepgram model to use
  language: en-US  # Language code
  fallback_to_local: true  # Fall back to local Whisper if remote fails
 updates:
  auto_check: true  # Check for updates on startup
--- a/gui/main_window_qt.py
+++ b/gui/main_window_qt.py
@@ -18,6 +18,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
 from client.config import Config
 from client.device_utils import DeviceManager
 from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
 from client.deepgram_transcription import DeepgramTranscriptionEngine
 from client.server_sync import ServerSyncClient
 from gui.settings_dialog_qt import SettingsDialog
 from server.web_display import TranscriptionWebServer
@@ -394,6 +395,23 @@ class MainWindow(QMainWindow):
            min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
            min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
        remote_mode = self.config.get('remote.mode', 'local')
        if remote_mode in ('managed', 'byok'):
            # Use Deepgram-based remote transcription
            self.transcription_engine = DeepgramTranscriptionEngine(
                config=self.config,
                user_name=user_name,
                input_device_index=audio_device
            )
            self.transcription_engine.set_callbacks(
                realtime_callback=self._on_realtime_transcription,
                final_callback=self._on_final_transcription
            )
            self.transcription_engine.set_error_callback(self._on_remote_error)
            self.transcription_engine.set_credits_low_callback(self._on_credits_low)
        else:
            # Use local Whisper transcription
            self.transcription_engine = RealtimeTranscriptionEngine(
                model=model,
                device=device,
@@ -430,8 +448,11 @@ class MainWindow(QMainWindow):
    def _on_engine_ready(self, success: bool, message: str):
        """Handle engine initialization completion."""
        if success:
-            # Update device label with actual device used
+            remote_mode = self.config.get('remote.mode', 'local')
-            if self.transcription_engine:
+            if remote_mode in ('managed', 'byok'):
                mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
                self.device_label.setText(f"Device: Deepgram ({mode_label})")
            elif self.transcription_engine:
                actual_device = self.transcription_engine.device
                compute_type = self.transcription_engine.compute_type
                device_display = f"{actual_device.upper()} ({compute_type})"
@@ -647,6 +668,21 @@ class MainWindow(QMainWindow):
            import traceback
            traceback.print_exc()
    def _on_remote_error(self, error_msg: str):
        """Handle error from remote transcription service."""
        print(f"Remote transcription error: {error_msg}")
        self.status_label.setText(f"⚠ Remote error: {error_msg}")
        # Fallback to local if enabled
        if self.config.get('remote.fallback_to_local', True) and self.is_transcribing:
            print("Falling back to local transcription...")
            self.status_label.setText("⚠ Remote failed — falling back to local")
    def _on_credits_low(self, seconds_remaining: int):
        """Handle low credits warning from proxy."""
        minutes = seconds_remaining // 60
        self.status_label.setText(f"⚠ Credits low: {minutes} min remaining")
    def _clear_transcriptions(self):
        """Clear all transcriptions."""
        if not self.transcriptions:
--- a/gui/settings_dialog_qt.py
+++ b/gui/settings_dialog_qt.py
@@ -4,7 +4,7 @@ from PySide6.QtWidgets import (
    QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
    QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
    QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
-    QFileDialog, QColorDialog
+    QFileDialog, QColorDialog, QRadioButton
 )
 from PySide6.QtCore import Qt
 from PySide6.QtGui import QScreen, QFontDatabase, QColor
@@ -487,46 +487,91 @@ class SettingsDialog(QDialog):
        server_group.setLayout(server_layout)
        content_layout.addWidget(server_group)
-        # Remote Processing Group
+        # Transcription Mode Group
-        remote_group = QGroupBox("Remote Processing (GPU Offload)")
+        mode_group = QGroupBox("Transcription Mode")
-        remote_layout = QFormLayout()
+        mode_layout = QVBoxLayout()
-        remote_layout.setSpacing(10)
+        mode_layout.setSpacing(10)
-        self.remote_enabled_check = QCheckBox()
+        # Radio buttons for mode selection
-        self.remote_enabled_check.setToolTip(
+        self.mode_local_radio = QRadioButton("Local (Whisper)")
-            "Enable remote transcription processing:\n"
+        self.mode_local_radio.setToolTip("Transcribe locally using Whisper models")
-            "• Offload transcription to a GPU-equipped server\n"
+        self.mode_managed_radio = QRadioButton("Remote - Managed")
-            "• Reduces local CPU/GPU usage\n"
+        self.mode_managed_radio.setToolTip("Use the transcription proxy service with prepaid credits")
-            "• Requires running the remote transcription service"
+        self.mode_byok_radio = QRadioButton("Remote - BYOK (Bring Your Own Key)")
-        )
+        self.mode_byok_radio.setToolTip("Connect directly to Deepgram with your own API key")
        remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
-        self.remote_url_input = QLineEdit()
+        mode_layout.addWidget(self.mode_local_radio)
-        self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
+        mode_layout.addWidget(self.mode_managed_radio)
-        self.remote_url_input.setToolTip(
+        mode_layout.addWidget(self.mode_byok_radio)
            "WebSocket URL of the remote transcription service:\n"
            "• Format: ws://host:port/ws/transcribe\n"
            "• Use wss:// for secure connections"
        )
        remote_layout.addRow("Server URL:", self.remote_url_input)
-        self.remote_api_key_input = QLineEdit()
+        # Managed mode fields (shown when managed radio selected)
-        self.remote_api_key_input.setEchoMode(QLineEdit.Password)
+        self.managed_widget = QWidget()
-        self.remote_api_key_input.setPlaceholderText("your-api-key")
+        managed_layout = QFormLayout()
-        self.remote_api_key_input.setToolTip(
+        managed_layout.setSpacing(8)
            "API key for authentication with the remote service"
        )
        remote_layout.addRow("API Key:", self.remote_api_key_input)
-        self.remote_fallback_check = QCheckBox("Enable")
+        self.managed_server_url = QLineEdit()
-        self.remote_fallback_check.setChecked(True)
+        self.managed_server_url.setPlaceholderText("wss://your-proxy-server.com")
-        self.remote_fallback_check.setToolTip(
+        managed_layout.addRow("Server URL:", self.managed_server_url)
            "Fall back to local transcription if remote service is unavailable"
        )
        remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
-        remote_group.setLayout(remote_layout)
+        # Login/Register buttons in a row
-        content_layout.addWidget(remote_group)
+        auth_widget = QWidget()
        auth_layout = QHBoxLayout()
        auth_layout.setContentsMargins(0, 0, 0, 0)
        self.managed_login_btn = QPushButton("Login")
        self.managed_login_btn.clicked.connect(self._managed_login)
        self.managed_register_btn = QPushButton("Register")
        self.managed_register_btn.clicked.connect(self._managed_register)
        auth_layout.addWidget(self.managed_login_btn)
        auth_layout.addWidget(self.managed_register_btn)
        auth_layout.addStretch()
        auth_widget.setLayout(auth_layout)
        managed_layout.addRow("Account:", auth_widget)
        self.managed_balance_label = QLabel("Not logged in")
        managed_layout.addRow("Balance:", self.managed_balance_label)
        self.managed_fallback_check = QCheckBox("Enable")
        self.managed_fallback_check.setChecked(True)
        self.managed_fallback_check.setToolTip("Fall back to local Whisper if remote fails")
        managed_layout.addRow("Fallback to Local:", self.managed_fallback_check)
        self.managed_widget.setLayout(managed_layout)
        mode_layout.addWidget(self.managed_widget)
        # BYOK mode fields (shown when BYOK radio selected)
        self.byok_widget = QWidget()
        byok_layout = QFormLayout()
        byok_layout.setSpacing(8)
        self.byok_api_key_input = QLineEdit()
        self.byok_api_key_input.setEchoMode(QLineEdit.Password)
        self.byok_api_key_input.setPlaceholderText("your-deepgram-api-key")
        byok_layout.addRow("Deepgram API Key:", self.byok_api_key_input)
        self.byok_model_combo = QComboBox()
        self.byok_model_combo.addItems(["nova-2", "nova-2-general", "nova-2-meeting", "nova-2-phonecall", "whisper-large", "whisper-medium", "whisper-small"])
        byok_layout.addRow("Model:", self.byok_model_combo)
        self.byok_language_input = QLineEdit()
        self.byok_language_input.setText("en-US")
        self.byok_language_input.setPlaceholderText("en-US")
        byok_layout.addRow("Language:", self.byok_language_input)
        self.byok_fallback_check = QCheckBox("Enable")
        self.byok_fallback_check.setChecked(True)
        self.byok_fallback_check.setToolTip("Fall back to local Whisper if Deepgram fails")
        byok_layout.addRow("Fallback to Local:", self.byok_fallback_check)
        self.byok_widget.setLayout(byok_layout)
        mode_layout.addWidget(self.byok_widget)
        mode_group.setLayout(mode_layout)
        content_layout.addWidget(mode_group)
        # Connect radio buttons to show/hide relevant widgets
        self.mode_local_radio.toggled.connect(self._on_mode_changed)
        self.mode_managed_radio.toggled.connect(self._on_mode_changed)
        self.mode_byok_radio.toggled.connect(self._on_mode_changed)
        # Updates Group
        updates_group = QGroupBox("Software Updates")
@@ -794,11 +839,28 @@ class SettingsDialog(QDialog):
        self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
        self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
-        # Remote processing settings
+        # Transcription mode settings
-        self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
+        mode = self.config.get('remote.mode', 'local')
-        self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
+        if mode == 'managed':
-        self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
+            self.mode_managed_radio.setChecked(True)
-        self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
+        elif mode == 'byok':
            self.mode_byok_radio.setChecked(True)
        else:
            self.mode_local_radio.setChecked(True)
        self.managed_server_url.setText(self.config.get('remote.server_url', ''))
        self.managed_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
        self.byok_api_key_input.setText(self.config.get('remote.byok_api_key', ''))
        self.byok_model_combo.setCurrentText(self.config.get('remote.deepgram_model', 'nova-2'))
        self.byok_language_input.setText(self.config.get('remote.language', 'en-US'))
        self.byok_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
        # Trigger visibility update
        self._on_mode_changed()
        # Update balance if managed mode and has token
        if self.config.get('remote.auth_token'):
            self._update_managed_balance()
        # Update settings
        self.update_auto_check.setChecked(self.config.get('updates.auto_check', True))
@@ -869,11 +931,21 @@ class SettingsDialog(QDialog):
            self.config.set('server_sync.room', self.server_room_input.text())
            self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
-            # Remote processing settings
+            # Transcription mode settings
-            self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
+            if self.mode_managed_radio.isChecked():
-            self.config.set('remote_processing.server_url', self.remote_url_input.text())
+                self.config.set('remote.mode', 'managed')
-            self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
+            elif self.mode_byok_radio.isChecked():
-            self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
+                self.config.set('remote.mode', 'byok')
            else:
                self.config.set('remote.mode', 'local')
            self.config.set('remote.server_url', self.managed_server_url.text())
            self.config.set('remote.fallback_to_local',
                self.managed_fallback_check.isChecked() if self.mode_managed_radio.isChecked()
                else self.byok_fallback_check.isChecked())
            self.config.set('remote.byok_api_key', self.byok_api_key_input.text())
            self.config.set('remote.deepgram_model', self.byok_model_combo.currentText())
            self.config.set('remote.language', self.byok_language_input.text())
            # Update settings
            self.config.set('updates.auto_check', self.update_auto_check.isChecked())
@@ -892,6 +964,194 @@ class SettingsDialog(QDialog):
        except Exception as e:
            QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
    def _on_mode_changed(self):
        """Show/hide mode-specific widgets based on selected radio button."""
        self.managed_widget.setVisible(self.mode_managed_radio.isChecked())
        self.byok_widget.setVisible(self.mode_byok_radio.isChecked())
    def _managed_login(self):
        """Open a login dialog and authenticate with the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        dialog = QDialog(self)
        dialog.setWindowTitle("Login")
        dialog.setMinimumWidth(350)
        layout = QFormLayout()
        email_input = QLineEdit()
        email_input.setPlaceholderText("you@example.com")
        layout.addRow("Email:", email_input)
        password_input = QLineEdit()
        password_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Password:", password_input)
        button_layout = QHBoxLayout()
        cancel_btn = QPushButton("Cancel")
        cancel_btn.clicked.connect(dialog.reject)
        login_btn = QPushButton("Login")
        login_btn.setDefault(True)
        button_layout.addStretch()
        button_layout.addWidget(cancel_btn)
        button_layout.addWidget(login_btn)
        layout.addRow("", button_layout)
        dialog.setLayout(layout)
        def do_login():
            server_url = self.managed_server_url.text().rstrip('/')
            if not server_url:
                QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
                return
            payload = json.dumps({
                "email": email_input.text(),
                "password": password_input.text()
            }).encode('utf-8')
            req = urllib.request.Request(
                f"{server_url}/auth/login",
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST"
            )
            try:
                with urllib.request.urlopen(req, timeout=10) as resp:
                    data = json.loads(resp.read().decode('utf-8'))
                token = data.get('token', '')
                if token:
                    self.config.set('remote.auth_token', token)
                    self._update_managed_balance()
                    QMessageBox.information(dialog, "Success", "Logged in successfully.")
                    dialog.accept()
                else:
                    QMessageBox.warning(dialog, "Error", "Login succeeded but no token received.")
            except urllib.error.HTTPError as e:
                try:
                    body = json.loads(e.read().decode('utf-8'))
                    msg = body.get('detail', body.get('message', str(e)))
                except Exception:
                    msg = str(e)
                QMessageBox.warning(dialog, "Login Failed", msg)
            except Exception as e:
                QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
        login_btn.clicked.connect(do_login)
        dialog.exec()
    def _managed_register(self):
        """Open a registration dialog and create an account on the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        dialog = QDialog(self)
        dialog.setWindowTitle("Register")
        dialog.setMinimumWidth(350)
        layout = QFormLayout()
        email_input = QLineEdit()
        email_input.setPlaceholderText("you@example.com")
        layout.addRow("Email:", email_input)
        password_input = QLineEdit()
        password_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Password:", password_input)
        confirm_input = QLineEdit()
        confirm_input.setEchoMode(QLineEdit.Password)
        layout.addRow("Confirm Password:", confirm_input)
        button_layout = QHBoxLayout()
        cancel_btn = QPushButton("Cancel")
        cancel_btn.clicked.connect(dialog.reject)
        register_btn = QPushButton("Register")
        register_btn.setDefault(True)
        button_layout.addStretch()
        button_layout.addWidget(cancel_btn)
        button_layout.addWidget(register_btn)
        layout.addRow("", button_layout)
        dialog.setLayout(layout)
        def do_register():
            if password_input.text() != confirm_input.text():
                QMessageBox.warning(dialog, "Error", "Passwords do not match.")
                return
            server_url = self.managed_server_url.text().rstrip('/')
            if not server_url:
                QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
                return
            payload = json.dumps({
                "email": email_input.text(),
                "password": password_input.text()
            }).encode('utf-8')
            req = urllib.request.Request(
                f"{server_url}/auth/register",
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST"
            )
            try:
                with urllib.request.urlopen(req, timeout=10) as resp:
                    data = json.loads(resp.read().decode('utf-8'))
                token = data.get('token', '')
                if token:
                    self.config.set('remote.auth_token', token)
                    self._update_managed_balance()
                    QMessageBox.information(dialog, "Success", "Account created and logged in.")
                    dialog.accept()
                else:
                    QMessageBox.information(dialog, "Success",
                        "Account created. Please log in.")
                    dialog.accept()
            except urllib.error.HTTPError as e:
                try:
                    body = json.loads(e.read().decode('utf-8'))
                    msg = body.get('detail', body.get('message', str(e)))
                except Exception:
                    msg = str(e)
                QMessageBox.warning(dialog, "Registration Failed", msg)
            except Exception as e:
                QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
        register_btn.clicked.connect(do_register)
        dialog.exec()
    def _update_managed_balance(self):
        """Fetch and display the current account balance from the managed proxy server."""
        import json
        import urllib.request
        import urllib.error
        server_url = self.managed_server_url.text().rstrip('/')
        token = self.config.get('remote.auth_token', '')
        if not server_url or not token:
            self.managed_balance_label.setText("Not logged in")
            return
        req = urllib.request.Request(
            f"{server_url}/billing/balance",
            headers={
                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json"
            },
            method="GET"
        )
        try:
            with urllib.request.urlopen(req, timeout=10) as resp:
                data = json.loads(resp.read().decode('utf-8'))
            balance = data.get('balance', data.get('credits', 'N/A'))
            self.managed_balance_label.setText(str(balance))
        except urllib.error.HTTPError as e:
            if e.code == 401:
                self.managed_balance_label.setText("Session expired - please login again")
                self.config.set('remote.auth_token', '')
            else:
                self.managed_balance_label.setText("Error fetching balance")
        except Exception:
            self.managed_balance_label.setText("Could not connect to server")
    def _check_for_updates_now(self):
        """Manually check for updates."""
        from version import __version__
--- a/index.html
+++ b/index.html
@@ -0,0 +1,13 @@
 <!doctype html>
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" type="image/png" href="/LocalTranscription.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Local Transcription</title>
  </head>
  <body>
    <div id="app"></div>
    <script type="module" src="/src/main.ts"></script>
  </body>
 </html>
--- a/local-transcription-headless.spec
+++ b/local-transcription-headless.spec
@@ -0,0 +1,184 @@
 # -*- mode: python ; coding: utf-8 -*-
 """PyInstaller spec file for headless Local Transcription backend (no PySide6/Qt).
 This builds the Python sidecar for the Tauri frontend.
 Much simpler than local-transcription.spec since all Qt dependencies are removed.
 """
 import sys
 import os
 block_cipher = None
 is_windows = sys.platform == 'win32'
 from PyInstaller.utils.hooks import collect_submodules, collect_data_files
 # Find faster_whisper assets folder
 import faster_whisper
 faster_whisper_path = os.path.dirname(faster_whisper.__file__)
 vad_assets_path = os.path.join(faster_whisper_path, 'assets')
 # pvporcupine resources (indirect dependency from RealtimeSTT)
 try:
    import pvporcupine
    pvporcupine_path = os.path.dirname(pvporcupine.__file__)
    pvporcupine_resources = os.path.join(pvporcupine_path, 'resources')
    pvporcupine_lib = os.path.join(pvporcupine_path, 'lib')
    pvporcupine_data_files = []
    if os.path.exists(pvporcupine_resources):
        pvporcupine_data_files.append((pvporcupine_resources, 'pvporcupine/resources'))
    if os.path.exists(pvporcupine_lib):
        pvporcupine_data_files.append((pvporcupine_lib, 'pvporcupine/lib'))
 except ImportError:
    pvporcupine_data_files = []
 # Data files
 datas = [
    ('config/default_config.yaml', 'config'),
    (vad_assets_path, 'faster_whisper/assets'),
 ] + pvporcupine_data_files
 # Hidden imports -- NO PySide6/Qt needed for headless backend
 hiddenimports = [
    # Transcription engine
    'faster_whisper',
    'faster_whisper.transcribe',
    'faster_whisper.vad',
    'ctranslate2',
    'sounddevice',
    'scipy',
    'scipy.signal',
    'numpy',
    # RealtimeSTT
    'RealtimeSTT',
    'RealtimeSTT.audio_recorder',
    'webrtcvad',
    'webrtcvad_wheels',
    'silero_vad',
    # PyTorch
    'torch',
    'torch.nn',
    'torch.nn.functional',
    'torchaudio',
    'onnxruntime',
    'onnxruntime.capi',
    'onnxruntime.capi.onnxruntime_pybind11_state',
    'pyaudio',
    'halo',
    'colorama',
    # FastAPI and dependencies
    'fastapi',
    'fastapi.routing',
    'fastapi.responses',
    'starlette',
    'starlette.applications',
    'starlette.routing',
    'starlette.responses',
    'starlette.websockets',
    'starlette.middleware',
    'starlette.middleware.cors',
    'pydantic',
    'pydantic.fields',
    'pydantic.main',
    'anyio',
    'anyio._backends',
    'anyio._backends._asyncio',
    'sniffio',
    # Uvicorn
    'uvicorn',
    'uvicorn.logging',
    'uvicorn.loops',
    'uvicorn.loops.auto',
    'uvicorn.protocols',
    'uvicorn.protocols.http',
    'uvicorn.protocols.http.auto',
    'uvicorn.protocols.http.h11_impl',
    'uvicorn.protocols.websockets',
    'uvicorn.protocols.websockets.auto',
    'uvicorn.protocols.websockets.wsproto_impl',
    'uvicorn.lifespan',
    'uvicorn.lifespan.on',
    'h11',
    'websockets',
    'websockets.legacy',
    'websockets.legacy.server',
    # HTTP client
    'requests',
    'urllib3',
    'certifi',
    'charset_normalizer',
 ]
 # Collect submodules for key packages
 print("Collecting submodules for backend packages...")
 for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11', 'httptools', 'uvloop']:
    try:
        submodules = collect_submodules(package)
        hiddenimports += submodules
        print(f"  + Collected {len(submodules)} submodules from {package}")
    except Exception as e:
        print(f"  - Warning: Could not collect {package}: {e}")
 # Collect data files
 for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn', 'RealtimeSTT']:
    try:
        data_files = collect_data_files(package)
        if data_files:
            datas += data_files
            print(f"  + Collected {len(data_files)} data files from {package}")
    except Exception:
        pass
 # Pydantic critical deps
 hiddenimports += [
    'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
    'email.message', 'typing_extensions',
 ]
 a = Analysis(
    ['backend/main_headless.py'],
    pathex=[],
    binaries=[],
    datas=datas,
    hiddenimports=hiddenimports,
    hookspath=['hooks'],
    hooksconfig={},
    runtime_hooks=[],
    excludes=['enum34', 'PySide6', 'PyQt5', 'PyQt6', 'tkinter'],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
 )
 pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='local-transcription-backend',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,  # Headless backend needs console for JSON output
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon='LocalTranscription.ico' if is_windows else None,
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='local-transcription-backend',
 )
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -0,0 +1,27 @@
 {
  "name": "local-transcription",
  "private": true,
  "version": "1.4.0",
  "type": "module",
  "scripts": {
    "dev": "vite dev",
    "build": "vite build",
    "preview": "vite preview",
    "tauri": "tauri"
  },
  "devDependencies": {
    "@sveltejs/vite-plugin-svelte": "^5.0.0",
    "@tauri-apps/cli": "^2.0.0",
    "@tsconfig/svelte": "^5.0.0",
    "svelte": "^5.0.0",
    "svelte-check": "^4.0.0",
    "typescript": "~5.6.0",
    "vite": "^6.0.0"
  },
  "dependencies": {
    "@tauri-apps/api": "^2.0.0",
    "@tauri-apps/plugin-dialog": "^2.0.0",
    "@tauri-apps/plugin-shell": "^2.0.0",
    "@tauri-apps/plugin-process": "^2.0.0"
  }
 }
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -0,0 +1,21 @@
 [package]
 name = "local-transcription"
 version = "1.4.0"
 description = "Real-time speech-to-text transcription for streamers"
 authors = ["Local Transcription Contributors"]
 edition = "2021"
 [lib]
 name = "local_transcription_lib"
 crate-type = ["lib", "cdylib", "staticlib"]
 [build-dependencies]
 tauri-build = { version = "2", features = [] }
 [dependencies]
 tauri = { version = "2", features = [] }
 tauri-plugin-shell = "2"
 tauri-plugin-dialog = "2"
 tauri-plugin-process = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
--- a/src-tauri/build.rs
+++ b/src-tauri/build.rs
@@ -0,0 +1,3 @@
 fn main() {
    tauri_build::build()
 }
--- a/src-tauri/gen/schemas/acl-manifests.json
+++ b/src-tauri/gen/schemas/acl-manifests.json
--- a/src-tauri/gen/schemas/capabilities.json
+++ b/src-tauri/gen/schemas/capabilities.json
@@ -0,0 +1 @@
 {}
--- a/src-tauri/gen/schemas/desktop-schema.json
+++ b/src-tauri/gen/schemas/desktop-schema.json
--- a/src-tauri/gen/schemas/linux-schema.json
+++ b/src-tauri/gen/schemas/linux-schema.json
--- a/src-tauri/icons/128x128.png
+++ b/src-tauri/icons/128x128.png
--- a/src-tauri/icons/128x128@2x.png
+++ b/src-tauri/icons/128x128@2x.png
--- a/src-tauri/icons/32x32.png
+++ b/src-tauri/icons/32x32.png
--- a/src-tauri/icons/icon.ico
+++ b/src-tauri/icons/icon.ico
--- a/src-tauri/icons/icon.png
+++ b/src-tauri/icons/icon.png
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -0,0 +1,9 @@
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
 pub fn run() {
    tauri::Builder::default()
        .plugin(tauri_plugin_shell::init())
        .plugin(tauri_plugin_dialog::init())
        .plugin(tauri_plugin_process::init())
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
 }
--- a/src-tauri/src/main.rs
+++ b/src-tauri/src/main.rs
@@ -0,0 +1,6 @@
 // Prevents additional console window on Windows in release
 #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
 fn main() {
    local_transcription_lib::run()
 }
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -0,0 +1,42 @@
 {
  "productName": "Local Transcription",
  "version": "1.4.0",
  "identifier": "com.localtranscription.app",
  "build": {
    "frontendDist": "../dist",
    "devUrl": "http://localhost:1420",
    "beforeDevCommand": "npm run dev",
    "beforeBuildCommand": "npm run build"
  },
  "app": {
    "windows": [
      {
        "title": "Local Transcription",
        "width": 800,
        "height": 600,
        "minWidth": 640,
        "minHeight": 480,
        "resizable": true
      }
    ],
    "security": {
      "csp": null
    }
  },
  "bundle": {
    "active": true,
    "targets": "all",
    "icon": [
      "icons/32x32.png",
      "icons/128x128.png",
      "icons/128x128@2x.png",
      "icons/icon.ico",
      "icons/icon.png"
    ]
  },
  "plugins": {
    "shell": {
      "open": true
    }
  }
 }
--- a/src/App.svelte
+++ b/src/App.svelte
@@ -0,0 +1,99 @@
 <script lang="ts">
  import { onMount } from "svelte";
  import Header from "$lib/components/Header.svelte";
  import StatusBar from "$lib/components/StatusBar.svelte";
  import Controls from "$lib/components/Controls.svelte";
  import TranscriptionDisplay from "$lib/components/TranscriptionDisplay.svelte";
  import Settings from "$lib/components/Settings.svelte";
  import { backendStore } from "$lib/stores/backend";
  import { configStore } from "$lib/stores/config";
  let showSettings = $state(false);
  let obsDisplayUrl = $derived(backendStore.obsUrl);
  let syncDisplayUrl = $derived(backendStore.syncUrl);
  function openSettings() {
    showSettings = true;
  }
  function closeSettings() {
    showSettings = false;
  }
  onMount(() => {
    backendStore.connect();
    configStore.loadConfig();
    return () => {
      backendStore.disconnect();
    };
  });
 </script>
 <div class="app-shell">
  <Header onSettingsClick={openSettings} />
  <StatusBar />
  <div class="display-links">
    <span class="link-label">OBS:</span>
    <a href={obsDisplayUrl} target="_blank" rel="noopener">{obsDisplayUrl}</a>
    {#if syncDisplayUrl}
      <span class="link-separator">|</span>
      <span class="link-label">Sync:</span>
      <a href={syncDisplayUrl} target="_blank" rel="noopener"
        >{syncDisplayUrl}</a
      >
    {/if}
  </div>
  <TranscriptionDisplay />
  <Controls />
  <div class="version-label">v{backendStore.version}</div>
 </div>
 {#if showSettings}
  <Settings onClose={closeSettings} />
 {/if}
 <style>
  .app-shell {
    display: flex;
    flex-direction: column;
    height: 100%;
    width: 100%;
    background-color: var(--bg-primary);
  }
  .display-links {
    display: flex;
    align-items: center;
    gap: 6px;
    padding: 6px 20px;
    font-size: 12px;
    background-color: var(--bg-primary);
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .link-label {
    color: var(--text-secondary);
    font-weight: 500;
  }
  .link-separator {
    color: var(--text-muted);
    margin: 0 4px;
  }
  .version-label {
    position: fixed;
    bottom: 6px;
    right: 12px;
    font-size: 11px;
    color: var(--text-muted);
    pointer-events: none;
    z-index: 10;
  }
 </style>
--- a/src/app.css
+++ b/src/app.css
@@ -0,0 +1,312 @@
 /* Global dark theme styles for Local Transcription */
 :root {
  --bg-primary: #1e1e1e;
  --bg-secondary: #2d2d2d;
  --bg-tertiary: #3a3a3a;
  --bg-hover: #454545;
  --text-primary: #e0e0e0;
  --text-secondary: #a0a0a0;
  --text-muted: #707070;
  --accent-green: #4caf50;
  --accent-green-hover: #45a049;
  --accent-red: #f44336;
  --accent-red-hover: #d32f2f;
  --accent-blue: #2196f3;
  --accent-blue-hover: #1976d2;
  --accent-orange: #ff9800;
  --border-color: #444;
  --border-color-light: #555;
  --scrollbar-track: #2d2d2d;
  --scrollbar-thumb: #555;
  --scrollbar-thumb-hover: #777;
 }
 *,
 *::before,
 *::after {
  box-sizing: border-box;
  margin: 0;
  padding: 0;
 }
 html,
 body {
  height: 100%;
  width: 100%;
  overflow: hidden;
 }
 body {
  background-color: var(--bg-primary);
  color: var(--text-primary);
  font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
    "Helvetica Neue", Arial, sans-serif;
  font-size: 14px;
  line-height: 1.5;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
 }
 #app {
  height: 100%;
  width: 100%;
  display: flex;
  flex-direction: column;
 }
 /* Buttons */
 button {
  font-family: inherit;
  font-size: 13px;
  font-weight: 500;
  padding: 8px 16px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  color: var(--text-primary);
  cursor: pointer;
  transition: background-color 0.15s ease, border-color 0.15s ease,
    transform 0.1s ease;
  user-select: none;
 }
 button:hover {
  background-color: var(--bg-hover);
  border-color: var(--border-color-light);
 }
 button:active {
  transform: scale(0.98);
 }
 button:disabled {
  opacity: 0.5;
  cursor: not-allowed;
  transform: none;
 }
 button.primary {
  background-color: var(--accent-green);
  border-color: var(--accent-green);
  color: white;
 }
 button.primary:hover {
  background-color: var(--accent-green-hover);
 }
 button.danger {
  background-color: var(--accent-red);
  border-color: var(--accent-red);
  color: white;
 }
 button.danger:hover {
  background-color: var(--accent-red-hover);
 }
 /* Inputs and Selects */
 input[type="text"],
 input[type="password"],
 input[type="number"],
 input[type="url"],
 input[type="email"],
 select,
 textarea {
  font-family: inherit;
  font-size: 13px;
  padding: 8px 12px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  color: var(--text-primary);
  outline: none;
  transition: border-color 0.15s ease;
  width: 100%;
 }
 input[type="text"]:focus,
 input[type="password"]:focus,
 input[type="number"]:focus,
 input[type="url"]:focus,
 input[type="email"]:focus,
 select:focus,
 textarea:focus {
  border-color: var(--accent-blue);
 }
 input[type="text"]::placeholder,
 input[type="password"]::placeholder,
 input[type="url"]::placeholder {
  color: var(--text-muted);
 }
 select {
  appearance: none;
  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a0a0a0' d='M6 8L1 3h10z'/%3E%3C/svg%3E");
  background-repeat: no-repeat;
  background-position: right 10px center;
  padding-right: 30px;
 }
 /* Color input */
 input[type="color"] {
  width: 50px;
  height: 36px;
  border: 1px solid var(--border-color);
  border-radius: 6px;
  background-color: var(--bg-secondary);
  cursor: pointer;
  padding: 2px;
 }
 input[type="color"]::-webkit-color-swatch-wrapper {
  padding: 2px;
 }
 input[type="color"]::-webkit-color-swatch {
  border: none;
  border-radius: 3px;
 }
 /* Range slider */
 input[type="range"] {
  -webkit-appearance: none;
  appearance: none;
  width: 100%;
  height: 6px;
  background: var(--bg-tertiary);
  border-radius: 3px;
  outline: none;
  cursor: pointer;
 }
 input[type="range"]::-webkit-slider-thumb {
  -webkit-appearance: none;
  appearance: none;
  width: 16px;
  height: 16px;
  border-radius: 50%;
  background: var(--accent-blue);
  cursor: pointer;
  border: 2px solid var(--bg-primary);
 }
 input[type="range"]::-moz-range-thumb {
  width: 16px;
  height: 16px;
  border-radius: 50%;
  background: var(--accent-blue);
  cursor: pointer;
  border: 2px solid var(--bg-primary);
 }
 /* Toggle / Checkbox styled as switch */
 input[type="checkbox"] {
  position: relative;
  width: 40px;
  height: 22px;
  -webkit-appearance: none;
  appearance: none;
  background-color: var(--bg-tertiary);
  border-radius: 11px;
  cursor: pointer;
  transition: background-color 0.2s ease;
  flex-shrink: 0;
 }
 input[type="checkbox"]::after {
  content: "";
  position: absolute;
  top: 2px;
  left: 2px;
  width: 18px;
  height: 18px;
  background-color: var(--text-secondary);
  border-radius: 50%;
  transition: transform 0.2s ease, background-color 0.2s ease;
 }
 input[type="checkbox"]:checked {
  background-color: var(--accent-green);
 }
 input[type="checkbox"]:checked::after {
  transform: translateX(18px);
  background-color: white;
 }
 /* Radio buttons */
 input[type="radio"] {
  -webkit-appearance: none;
  appearance: none;
  width: 18px;
  height: 18px;
  border: 2px solid var(--border-color);
  border-radius: 50%;
  background-color: var(--bg-secondary);
  cursor: pointer;
  position: relative;
  flex-shrink: 0;
 }
 input[type="radio"]:checked {
  border-color: var(--accent-blue);
 }
 input[type="radio"]:checked::after {
  content: "";
  position: absolute;
  top: 3px;
  left: 3px;
  width: 8px;
  height: 8px;
  background-color: var(--accent-blue);
  border-radius: 50%;
 }
 /* Scrollbar */
 ::-webkit-scrollbar {
  width: 8px;
  height: 8px;
 }
 ::-webkit-scrollbar-track {
  background: var(--scrollbar-track);
  border-radius: 4px;
 }
 ::-webkit-scrollbar-thumb {
  background: var(--scrollbar-thumb);
  border-radius: 4px;
 }
 ::-webkit-scrollbar-thumb:hover {
  background: var(--scrollbar-thumb-hover);
 }
 /* Firefox scrollbar */
 * {
  scrollbar-width: thin;
  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
 }
 /* Links */
 a {
  color: var(--accent-blue);
  text-decoration: none;
 }
 a:hover {
  text-decoration: underline;
 }
 /* Label */
 label {
  font-size: 13px;
  color: var(--text-secondary);
  display: flex;
  align-items: center;
  gap: 8px;
 }
--- a/src/lib/components/Controls.svelte
+++ b/src/lib/components/Controls.svelte
@@ -0,0 +1,116 @@
 <script lang="ts">
  import { backendStore } from "$lib/stores/backend";
  import { transcriptionStore } from "$lib/stores/transcriptions";
  let isTranscribing = $derived(backendStore.appState === "transcribing");
  let isReady = $derived(
    backendStore.appState === "ready" || backendStore.appState === "transcribing"
  );
  let isLoading = $state(false);
  async function toggleTranscription() {
    if (isLoading) return;
    isLoading = true;
    try {
      if (isTranscribing) {
        await backendStore.apiPost("/api/stop");
      } else {
        await backendStore.apiPost("/api/start");
      }
    } catch (err) {
      console.error("Failed to toggle transcription:", err);
    } finally {
      isLoading = false;
    }
  }
  async function clearTranscriptions() {
    try {
      await backendStore.apiPost("/api/clear");
      transcriptionStore.clearAll();
    } catch (err) {
      console.error("Failed to clear:", err);
    }
  }
  async function saveTranscriptions() {
    try {
      // Get transcription text from backend or local store
      let text: string;
      try {
        const data = await backendStore.apiGet<{ text: string }>("/api/transcriptions");
        text = data.text || transcriptionStore.getPlainText();
      } catch {
        text = transcriptionStore.getPlainText();
      }
      if (!text.trim()) {
        console.warn("No transcriptions to save");
        return;
      }
      // Try Tauri dialog for native save, fall back to browser download
      try {
        const { save } = await import("@tauri-apps/plugin-dialog");
        const filePath = await save({
          defaultPath: "transcription.txt",
          filters: [
            { name: "Text Files", extensions: ["txt"] },
            { name: "All Files", extensions: ["*"] },
          ],
        });
        if (filePath) {
          // Write via backend API
          await backendStore.apiPost("/api/save-file", { path: filePath, text });
        }
      } catch {
        // Fallback: browser-style download
        const blob = new Blob([text], { type: "text/plain" });
        const url = URL.createObjectURL(blob);
        const a = document.createElement("a");
        a.href = url;
        a.download = "transcription.txt";
        a.click();
        URL.revokeObjectURL(url);
      }
    } catch (err) {
      console.error("Failed to save:", err);
    }
  }
 </script>
 <div class="controls">
  <button
    class={isTranscribing ? "danger" : "primary"}
    onclick={toggleTranscription}
    disabled={!isReady || isLoading}
  >
    {#if isLoading}
      ...
    {:else if isTranscribing}
      Stop Transcription
    {:else}
      Start Transcription
    {/if}
  </button>
  <button onclick={clearTranscriptions} disabled={!backendStore.connected}>
    Clear
  </button>
  <button onclick={saveTranscriptions} disabled={!backendStore.connected}>
    Save
  </button>
 </div>
 <style>
  .controls {
    display: flex;
    align-items: center;
    gap: 8px;
    padding: 10px 20px;
    background-color: var(--bg-secondary);
    border-top: 1px solid var(--border-color);
    flex-shrink: 0;
  }
 </style>
--- a/src/lib/components/Header.svelte
+++ b/src/lib/components/Header.svelte
@@ -0,0 +1,82 @@
 <script lang="ts">
  interface Props {
    onSettingsClick: () => void;
  }
  let { onSettingsClick }: Props = $props();
 </script>
 <header class="app-header">
  <h1 class="app-title">Local Transcription</h1>
  <button class="settings-btn" onclick={onSettingsClick} title="Settings">
    <svg
      width="20"
      height="20"
      viewBox="0 0 24 24"
      fill="none"
      stroke="currentColor"
      stroke-width="2"
      stroke-linecap="round"
      stroke-linejoin="round"
    >
      <circle cx="12" cy="12" r="3"></circle>
      <path
        d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1
        0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0
        0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2
        2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65
        1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2
        0 0 1 0-2.83l.06-.06A1.65 1.65 0 0 0 4.68
        15a1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0
        0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0
        0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1
        2.83 0l.06.06A1.65 1.65 0 0 0 9 4.68a1.65 1.65 0
        0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2
        2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0
        1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0
        2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65
        0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0
        1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"
      ></path>
    </svg>
  </button>
 </header>
 <style>
  .app-header {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 12px 20px;
    background-color: var(--bg-secondary);
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .app-title {
    font-size: 24px;
    font-weight: 700;
    color: var(--text-primary);
    letter-spacing: -0.5px;
  }
  .settings-btn {
    display: flex;
    align-items: center;
    justify-content: center;
    width: 36px;
    height: 36px;
    padding: 0;
    border: 1px solid var(--border-color);
    border-radius: 8px;
    background-color: transparent;
    color: var(--text-secondary);
    cursor: pointer;
    transition: color 0.15s ease, background-color 0.15s ease;
  }
  .settings-btn:hover {
    color: var(--text-primary);
    background-color: var(--bg-tertiary);
  }
 </style>
--- a/src/lib/components/Settings.svelte
+++ b/src/lib/components/Settings.svelte
@@ -0,0 +1,780 @@
 <script lang="ts">
  import { configStore } from "$lib/stores/config";
  import { backendStore } from "$lib/stores/backend";
  interface Props {
    onClose: () => void;
  }
  let { onClose }: Props = $props();
  // Local copies of config values for editing
  let userName = $state("");
  let audioDevice = $state("default");
  let model = $state("base.en");
  let language = $state("en");
  let computeDevice = $state("auto");
  let computeType = $state("default");
  let enableRealtime = $state(false);
  let realtimeModel = $state("tiny.en");
  let realtimeProcessingPause = $state(0.1);
  let sileroSensitivity = $state(0.4);
  let webrtcSensitivity = $state(3);
  let postSpeechSilence = $state(0.3);
  let minRecordingLength = $state(0.5);
  let minGapBetween = $state(0);
  let continuousMode = $state(false);
  let showTimestamps = $state(true);
  let fadeSeconds = $state(10);
  let maxLines = $state(100);
  let fontSize = $state(12);
  let userColor = $state("#4CAF50");
  let textColor = $state("#FFFFFF");
  let backgroundColor = $state("#000000");
  let syncEnabled = $state(false);
  let syncUrl = $state("");
  let syncRoom = $state("default");
  let syncPassphrase = $state("");
  let remoteMode = $state("local");
  let remoteServerUrl = $state("");
  let managedEmail = $state("");
  let managedPassword = $state("");
  let autoCheckUpdates = $state(true);
  // Fetched device lists
  let audioDevices = $state<{ id: string; name: string }[]>([]);
  let computeDevices = $state<{ id: string; name: string }[]>([]);
  // Model options
  const modelOptions = [
    "tiny",
    "tiny.en",
    "base",
    "base.en",
    "small",
    "small.en",
    "medium",
    "medium.en",
    "large-v1",
    "large-v2",
    "large-v3",
  ];
  const computeTypeOptions = [
    { value: "default", label: "Default" },
    { value: "int8", label: "int8 (Fastest)" },
    { value: "float16", label: "float16 (GPU)" },
    { value: "float32", label: "float32 (Best Quality)" },
  ];
  const webrtcOptions = [
    { value: 0, label: "0 (Most Sensitive)" },
    { value: 1, label: "1" },
    { value: 2, label: "2" },
    { value: 3, label: "3 (Least Sensitive)" },
  ];
  // Load config values on mount
  $effect(() => {
    const cfg = configStore.config;
    userName = cfg.user.name;
    audioDevice = cfg.audio.input_device;
    model = cfg.transcription.model;
    language = cfg.transcription.language;
    computeDevice = cfg.transcription.device;
    computeType = cfg.transcription.compute_type;
    enableRealtime = cfg.transcription.enable_realtime_transcription;
    realtimeModel = cfg.transcription.realtime_model;
    realtimeProcessingPause = cfg.transcription.realtime_processing_pause;
    sileroSensitivity = cfg.transcription.silero_sensitivity;
    webrtcSensitivity = cfg.transcription.webrtc_sensitivity;
    postSpeechSilence = cfg.transcription.post_speech_silence_duration;
    minRecordingLength = cfg.transcription.min_length_of_recording;
    minGapBetween = cfg.transcription.min_gap_between_recordings;
    continuousMode = cfg.transcription.continuous_mode;
    showTimestamps = cfg.display.show_timestamps;
    fadeSeconds = cfg.display.fade_after_seconds;
    maxLines = cfg.display.max_lines;
    fontSize = cfg.display.font_size;
    userColor = cfg.display.user_color;
    textColor = cfg.display.text_color;
    // Strip alpha from background color for the color picker (only supports 6-char hex)
    const bgHex = cfg.display.background_color.replace("#", "");
    backgroundColor = "#" + bgHex.substring(0, 6);
    syncEnabled = cfg.server_sync.enabled;
    syncUrl = cfg.server_sync.url;
    syncRoom = cfg.server_sync.room;
    syncPassphrase = cfg.server_sync.passphrase;
    remoteMode = cfg.remote.mode;
    remoteServerUrl = cfg.remote.server_url;
    autoCheckUpdates = cfg.updates.auto_check;
  });
  // Fetch audio devices and compute devices on mount
  $effect(() => {
    fetchAudioDevices();
    fetchComputeDevices();
  });
  async function fetchAudioDevices() {
    try {
      const data = await backendStore.apiGet<{
        devices: { id: string; name: string }[];
      }>("/api/audio-devices");
      audioDevices = data.devices ?? [];
    } catch {
      audioDevices = [];
    }
  }
  async function fetchComputeDevices() {
    try {
      const data = await backendStore.apiGet<{
        devices: { id: string; name: string }[];
      }>("/api/compute-devices");
      computeDevices = data.devices ?? [];
    } catch {
      computeDevices = [
        { id: "auto", name: "Auto" },
        { id: "cpu", name: "CPU" },
        { id: "cuda", name: "CUDA (GPU)" },
      ];
    }
  }
  async function handleSave() {
    const updates = {
      user: {
        name: userName,
      },
      audio: {
        input_device: audioDevice,
      },
      transcription: {
        model,
        device: computeDevice,
        language,
        compute_type: computeType,
        enable_realtime_transcription: enableRealtime,
        realtime_model: realtimeModel,
        realtime_processing_pause: realtimeProcessingPause,
        silero_sensitivity: sileroSensitivity,
        webrtc_sensitivity: webrtcSensitivity,
        post_speech_silence_duration: postSpeechSilence,
        min_length_of_recording: minRecordingLength,
        min_gap_between_recordings: minGapBetween,
        continuous_mode: continuousMode,
      },
      display: {
        show_timestamps: showTimestamps,
        fade_after_seconds: fadeSeconds,
        max_lines: maxLines,
        font_size: fontSize,
        user_color: userColor,
        text_color: textColor,
        background_color: backgroundColor,
      },
      server_sync: {
        enabled: syncEnabled,
        url: syncUrl,
        room: syncRoom,
        passphrase: syncPassphrase,
      },
      remote: {
        mode: remoteMode,
        server_url: remoteServerUrl,
      },
      updates: {
        auto_check: autoCheckUpdates,
      },
    };
    try {
      await configStore.saveConfig(updates);
      onClose();
    } catch (err) {
      console.error("Failed to save settings:", err);
    }
  }
  function handleCancel() {
    onClose();
  }
  async function handleCheckUpdates() {
    try {
      await backendStore.apiPost("/api/check-updates");
    } catch (err) {
      console.error("Failed to check for updates:", err);
    }
  }
  async function handleManagedLogin() {
    try {
      await backendStore.apiPost("/api/remote/login", {
        email: managedEmail,
        password: managedPassword,
      });
    } catch (err) {
      console.error("Login failed:", err);
    }
  }
  async function handleManagedRegister() {
    try {
      await backendStore.apiPost("/api/remote/register", {
        email: managedEmail,
        password: managedPassword,
      });
    } catch (err) {
      console.error("Register failed:", err);
    }
  }
  function handleOverlayClick(e: MouseEvent) {
    if ((e.target as HTMLElement).classList.contains("settings-overlay")) {
      handleCancel();
    }
  }
  function handleKeydown(e: KeyboardEvent) {
    if (e.key === "Escape") {
      handleCancel();
    }
  }
 </script>
 <svelte:window onkeydown={handleKeydown} />
 <!-- svelte-ignore a11y_click_events_have_key_events a11y_no_static_element_interactions -->
 <div class="settings-overlay" role="presentation" onclick={handleOverlayClick}>
  <div class="settings-panel">
    <div class="settings-header">
      <h2>Settings</h2>
      <button class="close-btn" aria-label="Close settings" onclick={handleCancel}>
        <svg
          width="18"
          height="18"
          viewBox="0 0 24 24"
          fill="none"
          stroke="currentColor"
          stroke-width="2"
          stroke-linecap="round"
          stroke-linejoin="round"
        >
          <line x1="18" y1="6" x2="6" y2="18"></line>
          <line x1="6" y1="6" x2="18" y2="18"></line>
        </svg>
      </button>
    </div>
    <div class="settings-content">
      <!-- User Settings -->
      <section class="settings-section">
        <h3>User Settings</h3>
        <div class="field">
          <label for="user-name">Display Name</label>
          <input id="user-name" type="text" bind:value={userName} />
        </div>
      </section>
      <!-- Audio Settings -->
      <section class="settings-section">
        <h3>Audio Settings</h3>
        <div class="field">
          <label for="audio-device">Audio Device</label>
          <select id="audio-device" bind:value={audioDevice}>
            <option value="default">Default</option>
            {#each audioDevices as device}
              <option value={device.id}>{device.name}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Transcription Settings -->
      <section class="settings-section">
        <h3>Transcription Settings</h3>
        <div class="field">
          <label for="model">Model</label>
          <select id="model" bind:value={model}>
            {#each modelOptions as opt}
              <option value={opt}>{opt}</option>
            {/each}
          </select>
        </div>
        <div class="field">
          <label for="language">Language</label>
          <input id="language" type="text" bind:value={language} placeholder="en" />
        </div>
        <div class="field">
          <label for="compute-device">Compute Device</label>
          <select id="compute-device" bind:value={computeDevice}>
            {#each computeDevices as dev}
              <option value={dev.id}>{dev.name}</option>
            {/each}
          </select>
        </div>
        <div class="field">
          <label for="compute-type">Compute Type</label>
          <select id="compute-type" bind:value={computeType}>
            {#each computeTypeOptions as opt}
              <option value={opt.value}>{opt.label}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Realtime Preview -->
      <section class="settings-section">
        <h3>Realtime Preview</h3>
        <div class="field-row">
          <label for="enable-realtime">Enable Realtime Preview</label>
          <input
            id="enable-realtime"
            type="checkbox"
            bind:checked={enableRealtime}
          />
        </div>
        {#if enableRealtime}
          <div class="field">
            <label for="realtime-model">Realtime Model</label>
            <select id="realtime-model" bind:value={realtimeModel}>
              {#each modelOptions as opt}
                <option value={opt}>{opt}</option>
              {/each}
            </select>
          </div>
          <div class="field">
            <label for="realtime-pause"
              >Processing Pause: {realtimeProcessingPause.toFixed(2)}s</label
            >
            <input
              id="realtime-pause"
              type="range"
              min="0.01"
              max="1.0"
              step="0.01"
              bind:value={realtimeProcessingPause}
            />
          </div>
        {/if}
      </section>
      <!-- VAD Settings -->
      <section class="settings-section">
        <h3>VAD Settings</h3>
        <div class="field">
          <label for="silero-sensitivity"
            >Silero Sensitivity: {sileroSensitivity.toFixed(2)}</label
          >
          <input
            id="silero-sensitivity"
            type="range"
            min="0.0"
            max="1.0"
            step="0.05"
            bind:value={sileroSensitivity}
          />
        </div>
        <div class="field">
          <label for="webrtc-sensitivity">WebRTC Sensitivity</label>
          <select id="webrtc-sensitivity" bind:value={webrtcSensitivity}>
            {#each webrtcOptions as opt}
              <option value={opt.value}>{opt.label}</option>
            {/each}
          </select>
        </div>
      </section>
      <!-- Timing -->
      <section class="settings-section">
        <h3>Timing</h3>
        <div class="field">
          <label for="post-speech-silence"
            >Post-Speech Silence: {postSpeechSilence.toFixed(2)}s</label
          >
          <input
            id="post-speech-silence"
            type="range"
            min="0.1"
            max="3.0"
            step="0.1"
            bind:value={postSpeechSilence}
          />
        </div>
        <div class="field">
          <label for="min-recording"
            >Min Recording Length: {minRecordingLength.toFixed(2)}s</label
          >
          <input
            id="min-recording"
            type="range"
            min="0.1"
            max="5.0"
            step="0.1"
            bind:value={minRecordingLength}
          />
        </div>
        <div class="field">
          <label for="min-gap"
            >Min Gap Between Recordings: {minGapBetween.toFixed(2)}s</label
          >
          <input
            id="min-gap"
            type="range"
            min="0"
            max="3.0"
            step="0.1"
            bind:value={minGapBetween}
          />
        </div>
        <div class="field-row">
          <label for="continuous-mode">Continuous Mode</label>
          <input
            id="continuous-mode"
            type="checkbox"
            bind:checked={continuousMode}
          />
        </div>
      </section>
      <!-- Display Settings -->
      <section class="settings-section">
        <h3>Display Settings</h3>
        <div class="field-row">
          <label for="show-timestamps">Show Timestamps</label>
          <input
            id="show-timestamps"
            type="checkbox"
            bind:checked={showTimestamps}
          />
        </div>
        <div class="field">
          <label for="fade-seconds"
            >Fade After Seconds: {fadeSeconds} (0 = never)</label
          >
          <input
            id="fade-seconds"
            type="range"
            min="0"
            max="60"
            step="1"
            bind:value={fadeSeconds}
          />
        </div>
        <div class="field">
          <label for="max-lines">Max Lines: {maxLines}</label>
          <input
            id="max-lines"
            type="range"
            min="10"
            max="500"
            step="10"
            bind:value={maxLines}
          />
        </div>
        <div class="field">
          <label for="font-size">Font Size: {fontSize}px</label>
          <input
            id="font-size"
            type="range"
            min="8"
            max="32"
            step="1"
            bind:value={fontSize}
          />
        </div>
      </section>
      <!-- Color Settings -->
      <section class="settings-section">
        <h3>Color Settings</h3>
        <div class="field-row">
          <label for="user-color">User Color</label>
          <input id="user-color" type="color" bind:value={userColor} />
        </div>
        <div class="field-row">
          <label for="text-color">Text Color</label>
          <input id="text-color" type="color" bind:value={textColor} />
        </div>
        <div class="field-row">
          <label for="bg-color">Background Color</label>
          <input id="bg-color" type="color" bind:value={backgroundColor} />
        </div>
      </section>
      <!-- Server Sync -->
      <section class="settings-section">
        <h3>Server Sync</h3>
        <div class="field-row">
          <label for="sync-enabled">Enable Server Sync</label>
          <input
            id="sync-enabled"
            type="checkbox"
            bind:checked={syncEnabled}
          />
        </div>
        {#if syncEnabled}
          <div class="field">
            <label for="sync-url">Server URL</label>
            <input
              id="sync-url"
              type="url"
              bind:value={syncUrl}
              placeholder="http://localhost:3000/api/send"
            />
          </div>
          <div class="field">
            <label for="sync-room">Room</label>
            <input id="sync-room" type="text" bind:value={syncRoom} />
          </div>
          <div class="field">
            <label for="sync-passphrase">Passphrase</label>
            <input
              id="sync-passphrase"
              type="password"
              bind:value={syncPassphrase}
            />
          </div>
        {/if}
      </section>
      <!-- Remote Transcription -->
      <section class="settings-section">
        <h3>Remote Transcription</h3>
        <div class="radio-group">
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="local"
              bind:group={remoteMode}
            />
            Local
          </label>
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="managed"
              bind:group={remoteMode}
            />
            Managed
          </label>
          <label>
            <input
              type="radio"
              name="remote-mode"
              value="byok"
              bind:group={remoteMode}
            />
            BYOK (Bring Your Own Key)
          </label>
        </div>
        {#if remoteMode !== "local"}
          <div class="field">
            <label for="remote-url">Server URL</label>
            <input
              id="remote-url"
              type="url"
              bind:value={remoteServerUrl}
              placeholder="wss://your-proxy.com"
            />
          </div>
        {/if}
        {#if remoteMode === "managed"}
          <div class="managed-auth">
            <div class="field">
              <label for="managed-email">Email</label>
              <input
                id="managed-email"
                type="email"
                bind:value={managedEmail}
                placeholder="email@example.com"
              />
            </div>
            <div class="field">
              <label for="managed-password">Password</label>
              <input
                id="managed-password"
                type="password"
                bind:value={managedPassword}
              />
            </div>
            <div class="auth-buttons">
              <button onclick={handleManagedLogin}>Login</button>
              <button onclick={handleManagedRegister}>Register</button>
            </div>
          </div>
        {/if}
      </section>
      <!-- Updates -->
      <section class="settings-section">
        <h3>Updates</h3>
        <div class="field-row">
          <label for="auto-check-updates">Auto-Check for Updates</label>
          <input
            id="auto-check-updates"
            type="checkbox"
            bind:checked={autoCheckUpdates}
          />
        </div>
        <button onclick={handleCheckUpdates}>Check Now</button>
      </section>
    </div>
    <div class="settings-footer">
      <button onclick={handleCancel}>Cancel</button>
      <button class="primary" onclick={handleSave}>Save</button>
    </div>
  </div>
 </div>
 <style>
  .settings-overlay {
    position: fixed;
    top: 0;
    left: 0;
    right: 0;
    bottom: 0;
    background-color: rgba(0, 0, 0, 0.6);
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: 1000;
  }
  .settings-panel {
    background-color: var(--bg-primary);
    border: 1px solid var(--border-color);
    border-radius: 12px;
    width: 560px;
    max-width: 95vw;
    max-height: 85vh;
    display: flex;
    flex-direction: column;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
  }
  .settings-header {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 16px 20px;
    border-bottom: 1px solid var(--border-color);
    flex-shrink: 0;
  }
  .settings-header h2 {
    font-size: 18px;
    font-weight: 600;
    color: var(--text-primary);
  }
  .close-btn {
    display: flex;
    align-items: center;
    justify-content: center;
    width: 32px;
    height: 32px;
    padding: 0;
    border: none;
    border-radius: 6px;
    background-color: transparent;
    color: var(--text-secondary);
    cursor: pointer;
  }
  .close-btn:hover {
    background-color: var(--bg-tertiary);
    color: var(--text-primary);
  }
  .settings-content {
    flex: 1;
    overflow-y: auto;
    padding: 16px 20px;
  }
  .settings-section {
    margin-bottom: 24px;
  }
  .settings-section:last-child {
    margin-bottom: 0;
  }
  .settings-section h3 {
    font-size: 14px;
    font-weight: 600;
    color: var(--accent-blue);
    text-transform: uppercase;
    letter-spacing: 0.5px;
    margin-bottom: 12px;
    padding-bottom: 6px;
    border-bottom: 1px solid var(--border-color);
  }
  .field {
    margin-bottom: 12px;
  }
  .field label {
    display: block;
    margin-bottom: 4px;
    font-size: 12px;
    color: var(--text-secondary);
  }
  .field-row {
    display: flex;
    align-items: center;
    justify-content: space-between;
    margin-bottom: 12px;
  }
  .field-row label {
    font-size: 13px;
    color: var(--text-primary);
  }
  .radio-group {
    display: flex;
    flex-direction: column;
    gap: 8px;
    margin-bottom: 12px;
  }
  .radio-group label {
    display: flex;
    align-items: center;
    gap: 8px;
    font-size: 13px;
    color: var(--text-primary);
    cursor: pointer;
  }
  .managed-auth {
    margin-top: 8px;
    padding: 12px;
    background-color: var(--bg-secondary);
    border-radius: 8px;
  }
  .auth-buttons {
    display: flex;
    gap: 8px;
    margin-top: 8px;
  }
  .settings-footer {
    display: flex;
    justify-content: flex-end;
    gap: 8px;
    padding: 16px 20px;
    border-top: 1px solid var(--border-color);
    flex-shrink: 0;
  }
 </style>
--- a/src/lib/components/StatusBar.svelte
+++ b/src/lib/components/StatusBar.svelte
@@ -0,0 +1,106 @@
 <script lang="ts">
  import { backendStore } from "$lib/stores/backend";
  import { configStore } from "$lib/stores/config";
  let statusColor = $derived.by(() => {
    switch (backendStore.appState) {
      case "initializing":
        return "#ff9800";
      case "ready":
        return "#4caf50";
      case "transcribing":
        return "#f44336";
      case "error":
        return "#f44336";
      default:
        return "#888";
    }
  });
  let isPulsing = $derived(backendStore.appState === "transcribing");
  let userName = $derived(configStore.config.user.name);
 </script>
 <div class="status-bar">
  <div class="status-left">
    <span
      class="status-indicator"
      class:pulsing={isPulsing}
      style="background-color: {statusColor}"
    ></span>
    <span class="state-message">{backendStore.stateMessage}</span>
  </div>
  <div class="status-right">
    {#if backendStore.deviceInfo}
      <span class="device-info">{backendStore.deviceInfo}</span>
      <span class="separator">|</span>
    {/if}
    <span class="user-name">{userName}</span>
  </div>
 </div>
 <style>
  .status-bar {
    display: flex;
    align-items: center;
    justify-content: space-between;
    padding: 6px 20px;
    background-color: var(--bg-secondary);
    border-bottom: 1px solid var(--border-color);
    font-size: 12px;
    flex-shrink: 0;
  }
  .status-left {
    display: flex;
    align-items: center;
    gap: 8px;
  }
  .status-right {
    display: flex;
    align-items: center;
    gap: 8px;
    color: var(--text-secondary);
  }
  .status-indicator {
    width: 10px;
    height: 10px;
    border-radius: 50%;
    flex-shrink: 0;
  }
  .status-indicator.pulsing {
    animation: pulse 1.5s ease-in-out infinite;
  }
  @keyframes pulse {
    0%,
    100% {
      opacity: 1;
      box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.4);
    }
    50% {
      opacity: 0.7;
      box-shadow: 0 0 0 6px rgba(244, 67, 54, 0);
    }
  }
  .state-message {
    color: var(--text-primary);
  }
  .device-info {
    color: var(--text-secondary);
  }
  .separator {
    color: var(--text-muted);
  }
  .user-name {
    color: var(--accent-green);
    font-weight: 500;
  }
 </style>
--- a/src/lib/components/TranscriptionDisplay.svelte
+++ b/src/lib/components/TranscriptionDisplay.svelte
@@ -0,0 +1,110 @@
 <script lang="ts">
  import { transcriptionStore } from "$lib/stores/transcriptions";
  import { configStore } from "$lib/stores/config";
  let container: HTMLDivElement | undefined = $state();
  let showTimestamps = $derived(configStore.config.display.show_timestamps);
  let items = $derived(transcriptionStore.items);
  $effect(() => {
    // Trigger on items length change to auto-scroll
    const _len = items.length;
    if (container) {
      requestAnimationFrame(() => {
        if (container) {
          container.scrollTop = container.scrollHeight;
        }
      });
    }
  });
 </script>
 <div class="transcription-display" bind:this={container}>
  {#each items as item (item.id)}
    <div class="transcription-item" class:preview={item.isPreview}>
      {#if showTimestamps && item.timestamp}
        <span class="timestamp">[{item.timestamp}]</span>
      {/if}
      {#if item.userName}
        <span class="user-name">{item.userName}:</span>
      {/if}
      {#if item.isPreview}
        <span class="preview-indicator">[...]</span>
      {/if}
      <span class="text">{item.text}</span>
    </div>
  {:else}
    <div class="empty-state">
      Transcriptions will appear here...
    </div>
  {/each}
 </div>
 <style>
  .transcription-display {
    flex: 1;
    overflow-y: auto;
    padding: 12px 20px;
    display: flex;
    flex-direction: column;
    gap: 6px;
  }
  .transcription-item {
    padding: 6px 10px;
    border-radius: 4px;
    background-color: rgba(255, 255, 255, 0.03);
    animation: fadeIn 0.2s ease-out;
    line-height: 1.6;
    word-wrap: break-word;
  }
  .transcription-item.preview {
    font-style: italic;
    opacity: 0.7;
  }
  .timestamp {
    color: #888;
    font-size: 0.85em;
    margin-right: 8px;
    font-family: monospace;
  }
  .user-name {
    color: #4caf50;
    font-weight: 700;
    margin-right: 6px;
  }
  .preview-indicator {
    color: #888;
    font-size: 0.85em;
    margin-right: 4px;
  }
  .text {
    color: #ffffff;
  }
  .empty-state {
    display: flex;
    align-items: center;
    justify-content: center;
    height: 100%;
    color: var(--text-muted);
    font-size: 15px;
    font-style: italic;
  }
  @keyframes fadeIn {
    from {
      opacity: 0;
      transform: translateY(4px);
    }
    to {
      opacity: 1;
      transform: translateY(0);
    }
  }
 </style>
--- a/src/lib/stores/backend.ts
+++ b/src/lib/stores/backend.ts
@@ -0,0 +1,266 @@
 /**
 * Backend store - manages WebSocket connection and REST API communication
 * with the Python backend server running on localhost.
 *
 * The backend port defaults to 8081 but can be updated at runtime via
 * `setPort()`. The WebSocket connects to /ws/control for real-time push
 * of transcriptions, previews, and state changes.
 */
 export type ConnectionState = "connecting" | "connected" | "disconnected" | "error";
 export type AppState = "initializing" | "ready" | "transcribing" | "reloading" | "error";
 interface BackendState {
  port: number;
  connectionState: ConnectionState;
  appState: AppState;
  stateMessage: string;
  deviceInfo: string;
  wsConnection: WebSocket | null;
  version: string;
  lastError: string;
 }
 let state = $state<BackendState>({
  port: 8081,
  connectionState: "disconnected",
  appState: "initializing",
  stateMessage: "Connecting to backend...",
  deviceInfo: "",
  wsConnection: null,
  version: "1.4.0",
  lastError: "",
 });
 let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
 let reconnectAttempts = 0;
 const MAX_RECONNECT_DELAY_MS = 30_000;
 const BASE_RECONNECT_DELAY_MS = 1_000;
 // ── URL helpers ──────────────────────────────────────────────────────
 function apiUrl(path: string): string {
  const normalised = path.startsWith("/") ? path : `/${path}`;
  return `http://localhost:${state.port}${normalised}`;
 }
 async function apiFetch(path: string, options?: RequestInit): Promise<Response> {
  const url = apiUrl(path);
  const method = options?.method?.toUpperCase() ?? "GET";
  const headers = new Headers(options?.headers);
  if (method !== "GET" && !headers.has("Content-Type")) {
    headers.set("Content-Type", "application/json");
  }
  return fetch(url, { ...options, headers });
 }
 // ── WebSocket management ─────────────────────────────────────────────
 function connectWebSocket() {
  // Tear down any existing connection
  disconnect();
  state.connectionState = "connecting";
  reconnectAttempts = 0;
  _openSocket();
 }
 function _openSocket() {
  const wsUrl = `ws://localhost:${state.port}/ws/control`;
  try {
    const ws = new WebSocket(wsUrl);
    ws.onopen = () => {
      state.connectionState = "connected";
      state.lastError = "";
      reconnectAttempts = 0;
      if (reconnectTimer) {
        clearTimeout(reconnectTimer);
        reconnectTimer = null;
      }
    };
    ws.onmessage = (event) => {
      try {
        const data = JSON.parse(event.data);
        handleWebSocketMessage(data);
      } catch {
        // ignore parse errors
      }
    };
    ws.onclose = () => {
      state.wsConnection = null;
      if (state.connectionState !== "disconnected") {
        state.connectionState = "error";
        state.stateMessage = "Disconnected from backend";
        _scheduleReconnect();
      }
    };
    ws.onerror = () => {
      state.lastError = "WebSocket error";
      // onclose fires after this, which handles reconnect
    };
    state.wsConnection = ws;
  } catch {
    state.connectionState = "error";
    state.stateMessage = "Failed to connect";
    _scheduleReconnect();
  }
 }
 function _scheduleReconnect() {
  if (reconnectTimer) return;
  const delay = Math.min(
    BASE_RECONNECT_DELAY_MS * Math.pow(2, reconnectAttempts),
    MAX_RECONNECT_DELAY_MS,
  );
  reconnectAttempts++;
  reconnectTimer = setTimeout(() => {
    reconnectTimer = null;
    if (state.connectionState !== "disconnected") {
      state.connectionState = "connecting";
      _openSocket();
    }
  }, delay);
 }
 function disconnect() {
  if (reconnectTimer) {
    clearTimeout(reconnectTimer);
    reconnectTimer = null;
  }
  state.connectionState = "disconnected";
  if (state.wsConnection) {
    const ws = state.wsConnection;
    ws.onclose = null;
    ws.onerror = null;
    ws.close();
    state.wsConnection = null;
  }
 }
 // ── WebSocket message handling ───────────────────────────────────────
 function handleWebSocketMessage(data: Record<string, unknown>) {
  // Handle state changes locally
  if (data.type === "state_changed") {
    if (data.state) {
      state.appState = data.state as AppState;
    }
    if (data.message) {
      state.stateMessage = data.message as string;
    }
  }
  if (data.type === "error") {
    state.lastError = (data.message as string) ?? "Unknown error";
  }
  // Dispatch to window for other stores (transcriptions, etc.)
  if (data.type === "transcription") {
    window.dispatchEvent(
      new CustomEvent("backend:transcription", { detail: data })
    );
  } else if (data.type === "preview") {
    window.dispatchEvent(
      new CustomEvent("backend:preview", { detail: data })
    );
  } else if (data.type === "credits_low") {
    window.dispatchEvent(
      new CustomEvent("backend:credits_low", { detail: data })
    );
  }
 }
 // ── Port management ──────────────────────────────────────────────────
 function setPort(newPort: number) {
  if (newPort === state.port) return;
  state.port = newPort;
  // Reconnect with new port if we had a connection
  if (state.connectionState !== "disconnected") {
    connectWebSocket();
  }
 }
 // ── Typed REST helpers ───────────────────────────────────────────────
 async function apiGet<T = unknown>(path: string): Promise<T> {
  const resp = await apiFetch(path);
  if (!resp.ok) throw new Error(`GET ${path} failed: ${resp.status}`);
  return resp.json();
 }
 async function apiPost<T = unknown>(
  path: string,
  body?: unknown
 ): Promise<T> {
  const resp = await apiFetch(path, {
    method: "POST",
    body: body !== undefined ? JSON.stringify(body) : undefined,
  });
  if (!resp.ok) throw new Error(`POST ${path} failed: ${resp.status}`);
  return resp.json();
 }
 async function apiPut<T = unknown>(
  path: string,
  body?: unknown
 ): Promise<T> {
  const resp = await apiFetch(path, {
    method: "PUT",
    body: body !== undefined ? JSON.stringify(body) : undefined,
  });
  if (!resp.ok) throw new Error(`PUT ${path} failed: ${resp.status}`);
  return resp.json();
 }
 // ── Public API ───────────────────────────────────────────────────────
 export const backendStore = {
  get port() {
    return state.port;
  },
  get connectionState() {
    return state.connectionState;
  },
  get connected() {
    return state.connectionState === "connected";
  },
  get appState() {
    return state.appState;
  },
  get stateMessage() {
    return state.stateMessage;
  },
  get deviceInfo() {
    return state.deviceInfo;
  },
  get version() {
    return state.version;
  },
  get lastError() {
    return state.lastError;
  },
  get apiBaseUrl() {
    return `http://localhost:${state.port}`;
  },
  get wsUrl() {
    return `ws://localhost:${state.port}/ws/control`;
  },
  setPort,
  connect: connectWebSocket,
  disconnect,
  apiUrl,
  apiFetch,
  apiGet,
  apiPost,
  apiPut,
 };
--- a/src/lib/stores/config.ts
+++ b/src/lib/stores/config.ts
@@ -0,0 +1,243 @@
 /**
 * Config store - manages application configuration loaded from
 * and saved to the Python backend via the backend store's API helpers.
 *
 * The backend accepts PUT /api/config with `{ settings: { "dot.key": value } }`.
 */
 import { backendStore } from "$lib/stores/backend";
 export interface AppConfig {
  user: {
    name: string;
    id: string;
  };
  audio: {
    input_device: string;
    sample_rate: number;
  };
  transcription: {
    model: string;
    device: string;
    language: string;
    compute_type: string;
    enable_realtime_transcription: boolean;
    realtime_model: string;
    realtime_processing_pause: number;
    silero_sensitivity: number;
    silero_use_onnx: boolean;
    webrtc_sensitivity: number;
    post_speech_silence_duration: number;
    min_length_of_recording: number;
    min_gap_between_recordings: number;
    pre_recording_buffer_duration: number;
    beam_size: number;
    initial_prompt: string;
    no_log_file: boolean;
    continuous_mode: boolean;
  };
  server_sync: {
    enabled: boolean;
    url: string;
    room: string;
    passphrase: string;
  };
  display: {
    show_timestamps: boolean;
    max_lines: number;
    font_source: string;
    font_family: string;
    websafe_font: string;
    google_font: string;
    custom_font_file: string;
    font_size: number;
    theme: string;
    fade_after_seconds: number;
    user_color: string;
    text_color: string;
    background_color: string;
  };
  web_server: {
    port: number;
    host: string;
  };
  remote: {
    mode: string;
    server_url: string;
    auth_token: string;
    byok_api_key: string;
    deepgram_model: string;
    language: string;
    fallback_to_local: boolean;
  };
  updates: {
    auto_check: boolean;
    gitea_url: string;
    owner: string;
    repo: string;
    skipped_versions: string[];
    last_check: string;
    check_interval_hours: number;
  };
 }
 function getDefaultConfig(): AppConfig {
  return {
    user: { name: "User", id: "" },
    audio: { input_device: "default", sample_rate: 16000 },
    transcription: {
      model: "base.en",
      device: "auto",
      language: "en",
      compute_type: "default",
      enable_realtime_transcription: false,
      realtime_model: "tiny.en",
      realtime_processing_pause: 0.1,
      silero_sensitivity: 0.4,
      silero_use_onnx: true,
      webrtc_sensitivity: 3,
      post_speech_silence_duration: 0.3,
      min_length_of_recording: 0.5,
      min_gap_between_recordings: 0,
      pre_recording_buffer_duration: 0.2,
      beam_size: 5,
      initial_prompt: "",
      no_log_file: true,
      continuous_mode: false,
    },
    server_sync: {
      enabled: false,
      url: "http://localhost:3000/api/send",
      room: "default",
      passphrase: "",
    },
    display: {
      show_timestamps: true,
      max_lines: 100,
      font_source: "System Font",
      font_family: "Courier",
      websafe_font: "Arial",
      google_font: "Roboto",
      custom_font_file: "",
      font_size: 12,
      theme: "dark",
      fade_after_seconds: 10,
      user_color: "#4CAF50",
      text_color: "#FFFFFF",
      background_color: "#000000B3",
    },
    web_server: { port: 8080, host: "127.0.0.1" },
    remote: {
      mode: "local",
      server_url: "",
      auth_token: "",
      byok_api_key: "",
      deepgram_model: "nova-2",
      language: "en-US",
      fallback_to_local: true,
    },
    updates: {
      auto_check: true,
      gitea_url: "https://repo.anhonesthost.net",
      owner: "streamer-tools",
      repo: "local-transcription",
      skipped_versions: [],
      last_check: "",
      check_interval_hours: 24,
    },
  };
 }
 let config = $state<AppConfig>(getDefaultConfig());
 let loading = $state(false);
 let error = $state("");
 /**
 * Fetch the full configuration tree from the backend.
 * GET /api/config
 */
 async function fetchConfig(): Promise<void> {
  loading = true;
  error = "";
  try {
    const data = await backendStore.apiGet<Record<string, unknown>>("/api/config");
    // Deep merge with defaults to ensure all keys exist
    config = deepMerge(getDefaultConfig(), data) as AppConfig;
  } catch (err) {
    error = err instanceof Error ? err.message : String(err);
    console.error("[config] fetchConfig failed:", error);
  } finally {
    loading = false;
  }
 }
 function deepMerge(target: Record<string, unknown>, source: Record<string, unknown>): Record<string, unknown> {
  const result = { ...target };
  for (const key of Object.keys(source)) {
    if (
      source[key] &&
      typeof source[key] === "object" &&
      !Array.isArray(source[key]) &&
      target[key] &&
      typeof target[key] === "object" &&
      !Array.isArray(target[key])
    ) {
      result[key] = deepMerge(
        target[key] as Record<string, unknown>,
        source[key] as Record<string, unknown>
      );
    } else {
      result[key] = source[key];
    }
  }
  return result;
 }
 /**
 * Send a batch of setting updates to the backend.
 * PUT /api/config with body `{ settings: { "dot.key": value, ... } }`
 *
 * Keys use dot-notation, e.g. `{ "transcription.model": "small.en" }`.
 *
 * Returns the response payload on success, or throws on failure.
 */
 async function updateConfig(
  settings: Record<string, unknown>,
 ): Promise<{ status: string; message: string; engine_reloaded: boolean }> {
  loading = true;
  error = "";
  try {
    const result = await backendStore.apiPut<{
      status: string;
      message: string;
      engine_reloaded: boolean;
    }>("/api/config", { settings });
    // Refresh the local config tree so the UI stays in sync
    await fetchConfig();
    return result;
  } catch (err) {
    error = err instanceof Error ? err.message : String(err);
    console.error("[config] updateConfig failed:", error);
    throw err;
  } finally {
    loading = false;
  }
 }
 export const configStore = {
  get config() {
    return config;
  },
  get loading() {
    return loading;
  },
  get error() {
    return error;
  },
  fetchConfig,
  updateConfig,
 };
--- a/src/lib/stores/transcriptions.ts
+++ b/src/lib/stores/transcriptions.ts
@@ -0,0 +1,109 @@
 /**
 * Transcriptions store - manages the list of transcription items
 * received from the backend via WebSocket.
 */
 export interface TranscriptionItem {
  id: string;
  text: string;
  userName: string;
  timestamp: string;
  isPreview: boolean;
 }
 let items = $state<TranscriptionItem[]>([]);
 let nextId = 0;
 function generateId(): string {
  return `t-${Date.now()}-${nextId++}`;
 }
 function addTranscription(data: {
  text?: string;
  user_name?: string;
  timestamp?: string;
 }) {
  // When a final transcription arrives, remove any existing preview
  const previewIndex = items.findIndex((item) => item.isPreview);
  if (previewIndex !== -1) {
    items.splice(previewIndex, 1);
  }
  items.push({
    id: generateId(),
    text: data.text ?? "",
    userName: data.user_name ?? "",
    timestamp: data.timestamp ?? "",
    isPreview: false,
  });
  // Keep a reasonable limit
  if (items.length > 500) {
    items.splice(0, items.length - 500);
  }
 }
 function setPreview(data: {
  text?: string;
  user_name?: string;
  timestamp?: string;
 }) {
  const existingIndex = items.findIndex((item) => item.isPreview);
  const previewItem: TranscriptionItem = {
    id: existingIndex !== -1 ? items[existingIndex].id : generateId(),
    text: data.text ?? "",
    userName: data.user_name ?? "",
    timestamp: data.timestamp ?? "",
    isPreview: true,
  };
  if (existingIndex !== -1) {
    items[existingIndex] = previewItem;
  } else {
    items.push(previewItem);
  }
 }
 function clearAll() {
  items.length = 0;
 }
 function getPlainText(): string {
  return items
    .filter((item) => !item.isPreview)
    .map((item) => {
      let line = "";
      if (item.timestamp) line += `[${item.timestamp}] `;
      if (item.userName) line += `${item.userName}: `;
      line += item.text;
      return line;
    })
    .join("\n");
 }
 // Listen for backend events
 if (typeof window !== "undefined") {
  window.addEventListener("backend:transcription", ((e: CustomEvent) => {
    addTranscription(e.detail);
  }) as EventListener);
  window.addEventListener("backend:preview", ((e: CustomEvent) => {
    setPreview(e.detail);
  }) as EventListener);
 }
 export const transcriptionStore = {
  get items() {
    return items;
  },
  get currentPreview(): TranscriptionItem | null {
    return items.find((item) => item.isPreview) ?? null;
  },
  get transcriptions(): TranscriptionItem[] {
    return items.filter((item) => !item.isPreview);
  },
  addTranscription,
  setPreview,
  clearAll,
  getPlainText,
 };
--- a/src/main.ts
+++ b/src/main.ts
@@ -0,0 +1,6 @@
 import App from "./App.svelte";
 import { mount } from "svelte";
 import "./app.css";
 const app = mount(App, { target: document.getElementById("app")! });
 export default app;
--- a/svelte.config.js
+++ b/svelte.config.js
@@ -0,0 +1,5 @@
 import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
 export default {
  preprocess: vitePreprocess(),
 };
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -0,0 +1,15 @@
 {
  "extends": "@tsconfig/svelte/tsconfig.json",
  "compilerOptions": {
    "target": "ESNext",
    "useDefineForClassFields": true,
    "module": "ESNext",
    "resolveJsonModule": true,
    "allowJs": true,
    "checkJs": true,
    "isolatedModules": true,
    "moduleDetection": "force",
    "strict": true
  },
  "include": ["src/**/*.ts", "src/**/*.svelte"]
 }
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -0,0 +1,21 @@
 import { defineConfig } from "vite";
 import { svelte } from "@sveltejs/vite-plugin-svelte";
 import path from "path";
 // https://vitejs.dev/config/
 export default defineConfig({
  plugins: [svelte()],
  clearScreen: false,
  resolve: {
    alias: {
      $lib: path.resolve("./src/lib"),
    },
  },
  server: {
    port: 1420,
    strictPort: true,
    watch: {
      ignored: ["**/src-tauri/**", "**/client/**", "**/server/**", "**/backend/**", "**/gui/**"],
    },
  },
 });
		`@@ -0,0 +1 @@`
							`"""Backend package for headless transcription service."""`