chore: bump version to 0.2.44 [skip ci]

Fix permissions on already-extracted sidecar dirs
The chmod fix only ran after fresh extraction, but existing sidecar dirs extracted by older versions still lacked execute permissions. Now set_executable_permissions() runs on EVERY app launch (both the early-return path for existing dirs and after fresh extraction). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 20:45:32 +00:00 · 2026-03-23 13:45:26 -07:00 · 2026-03-23 20:30:33 +00:00 · 2026-03-23 13:30:26 -07:00 · 2026-03-23 20:18:57 +00:00 · 2026-03-23 13:18:51 -07:00
66 changed files with 6108 additions and 398 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,21 @@
 {
  "permissions": {
    "allow": [
      "Bash(git init:*)",
      "Bash(git:*)",
      "WebSearch",
      "Bash(npm create:*)",
      "Bash(cp:*)",
      "Bash(npm install:*)",
      "Bash(/home/jknapp/.cargo/bin/cargo test:*)",
      "Bash(ruff:*)",
      "Bash(npm run:*)",
      "Bash(npx svelte-check:*)",
      "Bash(pip install:*)",
      "Bash(python3:*)",
      "Bash(/home/jknapp/.cargo/bin/cargo check:*)",
      "Bash(cargo check:*)",
      "Bash(npm ls:*)"
    ]
  }
 }
--- a/.claude/worktrees/agent-a0bd87d1
+++ b/.claude/worktrees/agent-a0bd87d1
--- a/.claude/worktrees/agent-a198b5f8
+++ b/.claude/worktrees/agent-a198b5f8
--- a/.claude/worktrees/agent-ad3d6fca
+++ b/.claude/worktrees/agent-ad3d6fca
--- a/.claude/worktrees/agent-aefe2597
+++ b/.claude/worktrees/agent-aefe2597
--- a/.gitea/workflows/build-sidecar.yml
+++ b/.gitea/workflows/build-sidecar.yml
@@ -0,0 +1,402 @@
 name: Build Sidecars
 on:
  push:
    branches: [main]
    paths: ['python/**']
  workflow_dispatch:
 jobs:
  bump-sidecar-version:
    name: Bump sidecar version and tag
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    outputs:
      version: ${{ steps.bump.outputs.version }}
      tag: ${{ steps.bump.outputs.tag }}
      has_changes: ${{ steps.check_changes.outputs.has_changes }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 2
      - name: Check for python changes
        id: check_changes
        run: |
          # If triggered by workflow_dispatch, always build
          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            exit 0
          fi
          # Check if any python/ files changed in this commit
          CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "")
          if [ -n "$CHANGED" ]; then
            echo "has_changes=true" >> $GITHUB_OUTPUT
            echo "Python changes detected: $CHANGED"
          else
            echo "has_changes=false" >> $GITHUB_OUTPUT
            echo "No python/ changes detected, skipping sidecar build"
          fi
      - name: Configure git
        if: steps.check_changes.outputs.has_changes == 'true'
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump sidecar patch version
        if: steps.check_changes.outputs.has_changes == 'true'
        id: bump
        run: |
          # Read current version from python/pyproject.toml
          CURRENT=$(grep '^version = ' python/pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
          echo "Current sidecar version: ${CURRENT}"
          # Increment patch number
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New sidecar version: ${NEW_VERSION}"
          # Update python/pyproject.toml
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" python/pyproject.toml
          echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.version }}"
          TAG="${{ steps.bump.outputs.tag }}"
          git add python/pyproject.toml
          git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
          git tag "${TAG}"
          # Push using token for authentication (rebase in case another workflow pushed first)
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "${TAG}"
      - name: Create Gitea release
        if: steps.check_changes.outputs.has_changes == 'true'
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          VERSION="${{ steps.bump.outputs.version }}"
          RELEASE_NAME="Sidecar v${VERSION}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
  build-sidecar-linux:
    name: Build Sidecar (Linux)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Install ffmpeg
        run: sudo apt-get update && sudo apt-get install -y ffmpeg
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Build sidecar (CUDA)
        working-directory: python
        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
      - name: Package sidecar (CUDA)
        run: |
          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cuda.zip .
      - name: Build sidecar (CPU)
        working-directory: python
        run: |
          rm -rf dist/voice-to-notes-sidecar
          uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
      - name: Package sidecar (CPU)
        run: |
          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
  build-sidecar-windows:
    name: Build Sidecar (Windows)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: windows-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        shell: powershell
        run: |
          if (Get-Command uv -ErrorAction SilentlyContinue) {
            Write-Host "uv already installed: $(uv --version)"
          } else {
            irm https://astral.sh/uv/install.ps1 | iex
            echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          }
      - name: Install ffmpeg
        shell: powershell
        run: choco install ffmpeg -y
      - name: Set up Python
        shell: powershell
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Install 7-Zip
        shell: powershell
        run: |
          if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
            choco install 7zip -y
          }
      - name: Build sidecar (CUDA)
        shell: powershell
        working-directory: python
        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
      - name: Package sidecar (CUDA)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\python\dist\voice-to-notes-sidecar\*
      - name: Build sidecar (CPU)
        shell: powershell
        working-directory: python
        run: |
          Remove-Item -Recurse -Force dist\voice-to-notes-sidecar
          uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
      - name: Package sidecar (CPU)
        shell: powershell
        run: |
          7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\python\dist\voice-to-notes-sidecar\*
      - name: Upload to sidecar release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
          Write-Host "Waiting for sidecar release ${TAG} to be available..."
          $RELEASE_ID = $null
          for ($i = 1; $i -le 30; $i++) {
            try {
              $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
              $RELEASE_ID = $release.id
              if ($RELEASE_ID) {
                Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
                break
              }
            } catch {
              # Release not ready yet
            }
            Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            Start-Sleep -Seconds 10
          }
          if (-not $RELEASE_ID) {
            Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          }
          Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading ${filename} (${size} MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: ${filename}"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
            }
          }
  build-sidecar-macos:
    name: Build Sidecar (macOS)
    needs: bump-sidecar-version
    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
    runs-on: macos-latest
    env:
      PYTHON_VERSION: "3.11"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
      - name: Install uv
        run: |
          if command -v uv &> /dev/null; then
            echo "uv already installed: $(uv --version)"
          else
            curl -LsSf https://astral.sh/uv/install.sh | sh
            echo "$HOME/.local/bin" >> $GITHUB_PATH
          fi
      - name: Install ffmpeg
        run: brew install ffmpeg
      - name: Set up Python
        run: uv python install ${{ env.PYTHON_VERSION }}
      - name: Build sidecar (CPU)
        working-directory: python
        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
      - name: Package sidecar (CPU)
        run: |
          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-macos-aarch64-cpu.zip .
      - name: Upload to sidecar release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
          echo "Waiting for sidecar release ${TAG} to be available..."
          for i in $(seq 1 30); do
            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/tags/${TAG}")
            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
              break
            fi
            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
            sleep 10
          done
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
            exit 1
          fi
          for file in sidecar-*.zip; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitea/workflows/cleanup-releases.yml
+++ b/.gitea/workflows/cleanup-releases.yml
@@ -0,0 +1,65 @@
 name: Cleanup Old Releases
 on:
  # Run after release and sidecar workflows complete
  schedule:
    - cron: '0 6 * * *'  # Daily at 6am UTC
  workflow_dispatch:
 jobs:
  cleanup:
    name: Remove old releases
    runs-on: ubuntu-latest
    env:
      KEEP_COUNT: 5
    steps:
      - name: Cleanup old app releases
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          # Get all releases, sorted newest first (API default)
          RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases?limit=50")
          # Separate app releases (v*) and sidecar releases (sidecar-v*)
          APP_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("v") and (startswith("sidecar") | not)) | .id] | .[]')
          SIDECAR_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("sidecar-v")) | .id] | .[]')
          # Delete app releases beyond KEEP_COUNT
          COUNT=0
          for ID in $APP_IDS; do
            COUNT=$((COUNT + 1))
            if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
              continue
            fi
            TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
            echo "Deleting app release $ID ($TAG)..."
            curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
              -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/$ID"
            # Also delete the tag
            curl -s -o /dev/null -X DELETE \
              -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/tags/$TAG"
          done
          # Delete sidecar releases beyond KEEP_COUNT
          COUNT=0
          for ID in $SIDECAR_IDS; do
            COUNT=$((COUNT + 1))
            if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
              continue
            fi
            TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
            echo "Deleting sidecar release $ID ($TAG)..."
            curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
              -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/$ID"
            curl -s -o /dev/null -X DELETE \
              -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/tags/$TAG"
          done
          echo "Cleanup complete. Kept latest ${{ env.KEEP_COUNT }} of each type."
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,305 @@
 name: Release
 on:
  push:
    branches: [main]
 jobs:
  bump-version:
    name: Bump version and tag
    # Skip if this is a version-bump commit (avoid infinite loop)
    if: "!contains(github.event.head_commit.message, '[skip ci]')"
    runs-on: ubuntu-latest
    outputs:
      new_version: ${{ steps.bump.outputs.new_version }}
      tag: ${{ steps.bump.outputs.tag }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Configure git
        run: |
          git config user.name "Gitea Actions"
          git config user.email "actions@gitea.local"
      - name: Bump patch version
        id: bump
        run: |
          # Read current version from package.json
          CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
          echo "Current version: ${CURRENT}"
          # Increment patch number
          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
          NEW_PATCH=$((PATCH + 1))
          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
          echo "New version: ${NEW_VERSION}"
          # Update package.json
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
          # Update src-tauri/tauri.conf.json
          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
          # Update src-tauri/Cargo.toml (match version = "x.y.z" in [package] section)
          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
          echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
          echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
      - name: Commit and tag
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          NEW_VERSION="${{ steps.bump.outputs.new_version }}"
          git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml
          git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
          git tag "v${NEW_VERSION}"
          # Push using token for authentication (rebase in case another workflow pushed first)
          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
          git pull --rebase "${REMOTE_URL}" main || true
          git push "${REMOTE_URL}" HEAD:main
          git push "${REMOTE_URL}" "v${NEW_VERSION}"
      - name: Create Gitea release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ steps.bump.outputs.tag }}"
          RELEASE_NAME="Voice to Notes ${TAG}"
          curl -s -X POST \
            -H "Authorization: token ${BUILD_TOKEN}" \
            -H "Content-Type: application/json" \
            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
            "${REPO_API}/releases"
          echo "Created release: ${RELEASE_NAME}"
  # ── Platform builds (run after version bump) ──
  build-linux:
    name: Build App (Linux)
    needs: bump-version
    runs-on: ubuntu-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          sudo apt-get install -y jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-version.outputs.tag }}"
          echo "Release tag: ${TAG}"
          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG}."
            exit 1
          fi
          echo "Release ID: ${RELEASE_ID}"
          find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" \) | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
  build-windows:
    name: Build App (Windows)
    needs: bump-version
    runs-on: windows-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        shell: powershell
        run: |
          if (Get-Command rustup -ErrorAction SilentlyContinue) {
            rustup default stable
          } else {
            Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
            .\rustup-init.exe -y --default-toolchain stable
            echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          }
      - name: Install npm dependencies
        shell: powershell
        run: npm ci
      - name: Build Tauri app
        shell: powershell
        run: npm run tauri build
      - name: Upload to release
        shell: powershell
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
          $TAG = "${{ needs.bump-version.outputs.tag }}"
          Write-Host "Release tag: ${TAG}"
          $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
          $RELEASE_ID = $release.id
          Write-Host "Release ID: ${RELEASE_ID}"
          Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
            $filename = $_.Name
            $encodedName = [System.Uri]::EscapeDataString($filename)
            $size = [math]::Round($_.Length / 1MB, 1)
            Write-Host "Uploading ${filename} (${size} MB)..."
            try {
              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
              $existing = $assets | Where-Object { $_.name -eq $filename }
              if ($existing) {
                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
              }
            } catch {}
            # Use curl for streaming upload (Invoke-RestMethod fails on large files)
            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
            $result = curl.exe --fail --silent --show-error `
              -X POST `
              -H "Authorization: token $env:BUILD_TOKEN" `
              -H "Content-Type: application/octet-stream" `
              -T "$($_.FullName)" `
              "$uploadUrl" 2>&1
            if ($LASTEXITCODE -eq 0) {
              Write-Host "Upload successful: ${filename}"
            } else {
              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
            }
          }
  build-macos:
    name: Build App (macOS)
    needs: bump-version
    runs-on: macos-latest
    env:
      NODE_VERSION: "20"
    steps:
      - uses: actions/checkout@v4
        with:
          ref: ${{ needs.bump-version.outputs.tag }}
      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version: ${{ env.NODE_VERSION }}
      - name: Install Rust stable
        run: |
          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Install system dependencies
        run: brew install --quiet create-dmg || true
      - name: Install npm dependencies
        run: npm ci
      - name: Build Tauri app
        run: npm run tauri build
      - name: Upload to release
        env:
          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
        run: |
          which jq || brew install jq
          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
          TAG="${{ needs.bump-version.outputs.tag }}"
          echo "Release tag: ${TAG}"
          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
            echo "ERROR: Failed to find release for tag ${TAG}."
            exit 1
          fi
          echo "Release ID: ${RELEASE_ID}"
          find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
            filename=$(basename "$file")
            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
            if [ -n "${ASSET_ID}" ]; then
              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
            fi
            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
              -H "Authorization: token ${BUILD_TOKEN}" \
              -H "Content-Type: application/octet-stream" \
              -T "$file" \
              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
            echo "Upload response: HTTP ${HTTP_CODE}"
          done
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,10 @@ Thumbs.db
 *.ogg
 *.flac
 !test/fixtures/*
 # Sidecar build artifacts
 src-tauri/binaries/*
 !src-tauri/binaries/.gitkeep
 src-tauri/sidecar.zip
 python/dist/
 python/build/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -8,7 +8,7 @@ Desktop app for transcribing audio/video with speaker identification. Runs local
 - **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
 - **Database:** SQLite (via rusqlite in Rust)
 - **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
- **Cloud AI providers:** LiteLLM, OpenAI, Anthropic (optional, user-configured)
+- **Cloud AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (optional, user-configured)
 - **Caption export:** pysubs2 (Python)
 - **Audio UI:** wavesurfer.js
 - **Transcript editor:** TipTap (ProseMirror)
@@ -40,7 +40,13 @@ docs/                   # Architecture and design documents
 - Database: UUIDs as primary keys (TEXT type in SQLite)
 - All timestamps in milliseconds (integer) relative to media file start
 ## Distribution
 - Python sidecar is frozen via PyInstaller into a standalone binary for distribution
 - Tauri bundles the sidecar via `externalBin` — no Python required for end users
 - CI/CD builds on Gitea Actions (Linux, Windows, macOS ARM)
 - Dev mode uses system Python (`VOICE_TO_NOTES_DEV=1` or debug builds)
 ## Platform Targets
- Linux (primary development target)
+- Linux x86_64 (primary development target)
- Windows (must work, tested before release)
+- Windows x86_64
- macOS (future, not yet targeted)
+- macOS aarch64 (Apple Silicon)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,140 @@
 # Contributing to Voice to Notes
 Thank you for your interest in contributing! This guide covers how to set up the project for development and submit changes.
 ## Development Setup
 ### Prerequisites
 - **Node.js 20+** and npm
 - **Rust** (stable toolchain)
 - **Python 3.11+** with [uv](https://docs.astral.sh/uv/) (recommended) or pip
 - **System libraries (Linux only):**
  ```bash
  sudo apt install libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
  ```
 ### Clone and Install
 ```bash
 git clone https://repo.anhonesthost.net/MacroPad/voice-to-notes.git
 cd voice-to-notes
 # Frontend
 npm install
 # Python sidecar
 cd python && pip install -e ".[dev]" && cd ..
 ```
 ### Running in Dev Mode
 ```bash
 npm run tauri:dev
 ```
 This runs the Svelte dev server + Tauri with hot-reload. The Python sidecar runs from your system Python (no PyInstaller needed in dev mode).
 ### Building
 ```bash
 # Build the Python sidecar (frozen binary)
 cd python && python build_sidecar.py --cpu-only && cd ..
 # Build the full app
 npm run tauri build
 ```
 ## Project Structure
 ```
 src/                        # Svelte 5 frontend
  lib/components/           # Reusable UI components
  lib/stores/               # Svelte stores (app state)
  routes/                   # SvelteKit pages
 src-tauri/                  # Rust backend (Tauri v2)
  src/sidecar/              # Python sidecar lifecycle (download, extract, IPC)
  src/commands/             # Tauri command handlers
  src/db/                   # SQLite database layer
 python/                     # Python ML sidecar
  voice_to_notes/           # Main package
    services/               # Transcription, diarization, AI, export
    ipc/                    # JSON-line IPC protocol
    hardware/               # GPU/CPU detection
 .gitea/workflows/           # CI/CD pipelines
 docs/                       # Documentation
 ```
 ## How It Works
 The app has three layers:
 1. **Frontend (Svelte)** — UI, audio playback (wavesurfer.js), transcript editing (TipTap)
 2. **Backend (Rust/Tauri)** — Desktop integration, file access, SQLite, sidecar process management
 3. **Sidecar (Python)** — ML inference (faster-whisper, pyannote.audio), AI chat, export
 Rust and Python communicate via **JSON-line IPC** over stdin/stdout pipes. Each request has an `id`, `type`, and `payload`. The Python sidecar runs as a child process managed by `SidecarManager` in Rust.
 ## Conventions
 ### Rust
 - Follow standard Rust conventions
 - Run `cargo fmt` and `cargo clippy` before committing
 - Tauri commands go in `src-tauri/src/commands/`
 ### Python
 - Python 3.11+, type hints everywhere
 - Use `ruff` for linting: `ruff check python/`
 - Tests with pytest: `cd python && pytest`
 - IPC messages: JSON-line format with `id`, `type`, `payload` fields
 ### TypeScript / Svelte
 - Svelte 5 runes (`$state`, `$derived`, `$effect`)
 - Strict TypeScript
 - Components in `src/lib/components/`
 - State in `src/lib/stores/`
 ### General
 - All timestamps in milliseconds (integer)
 - UUIDs as primary keys in the database
 - Don't bundle API keys or secrets — those are user-configured
 ## Submitting Changes
 1. Fork the repository
 2. Create a feature branch: `git checkout -b my-feature`
 3. Make your changes
 4. Test locally with `npm run tauri:dev`
 5. Run linters: `cargo fmt && cargo clippy`, `ruff check python/`
 6. Commit with a clear message describing the change
 7. Open a Pull Request against `main`
 ## CI/CD
 Pushes to `main` automatically:
 - Bump the app version and create a release (`release.yml`)
 - Build app installers for all platforms
 Changes to `python/` also trigger sidecar builds (`build-sidecar.yml`).
 ## Areas for Contribution
 - UI/UX improvements
 - New export formats
 - Additional AI provider integrations
 - Performance optimizations
 - Accessibility improvements
 - Documentation and translations
 - Bug reports and testing on different platforms
 ## Reporting Issues
 Open an issue on the [repository](https://repo.anhonesthost.net/MacroPad/voice-to-notes/issues) with:
 - Steps to reproduce
 - Expected vs actual behavior
 - Platform and version info
 - Sidecar logs (`%LOCALAPPDATA%\com.voicetonotes.app\sidecar.log` on Windows)
 ## License
 By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
--- a/README.md
+++ b/README.md
@@ -1,30 +1,129 @@
 # Voice to Notes
-A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
+A desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, plain text, or Markdown.
-## Goals
+## Features
- **Speech-to-Text Transcription** — Accurately convert spoken audio from recordings into text
+- **Speech-to-Text** — Accurate transcription via faster-whisper with word-level timestamps. Supports 99 languages.
- **Speaker Identification (Diarization)** — Detect and distinguish between different speakers in a conversation
+- **Speaker Identification** — Detect and label speakers using pyannote.audio. Rename speakers for clean exports.
- **Speaker Naming** — Assign and persist speaker names/IDs across the transcription
+- **GPU Acceleration** — CUDA support for NVIDIA GPUs (Windows/Linux). Falls back to CPU automatically.
- **Synchronized Playback** — Click any transcribed text segment to play back the corresponding audio for review and correction
+- **Synchronized Playback** — Click any word to seek. Waveform visualization via wavesurfer.js.
- **Export Formats**
+- **AI Chat** — Ask questions about your transcript. Works with Ollama (local), OpenAI, Anthropic, or any OpenAI-compatible API.
-  - Closed captioning files (SRT, VTT) for video
+- **Export** — SRT, WebVTT, ASS, plain text, Markdown — all with speaker labels.
-  - Plain text documents with speaker labels
+- **Cross-Platform** — Linux, Windows, macOS (Apple Silicon).
- **AI Integration** — Connect to AI providers to ask questions about the conversation and generate condensed notes/summaries
+
 ## Quick Start
 1. Download the installer from [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases)
 2. On first launch, choose **CPU** or **CUDA** sidecar (the AI engine downloads separately, ~500MB–2GB)
 3. Import an audio/video file and click **Transcribe**
 See the full [User Guide](docs/USER_GUIDE.md) for detailed setup and usage instructions.
 ## Platform Support
-| Platform | Status |
+| Platform | Architecture | Installers |
-|----------|--------|
+|----------|-------------|------------|
-| Linux    | Planned (initial target) |
+| Linux    | x86_64      | .deb, .rpm |
-| Windows  | Planned (initial target) |
+| Windows  | x86_64      | .msi, .exe (NSIS) |
-| macOS    | Future (pending hardware) |
+| macOS    | ARM (Apple Silicon) | .dmg |
-## Project Status
+## Architecture
-**Early planning phase** — Architecture and technology decisions in progress.
+The app is split into two independently versioned components:
 - **App** (v0.2.x) — Tauri desktop shell with Svelte frontend. Small installer (~50MB).
 - **Sidecar** (v1.x) — Python ML engine (faster-whisper, pyannote.audio). Downloaded on first launch. CPU (~500MB) or CUDA (~2GB) variants.
 This separation means app UI updates don't require re-downloading the sidecar, and sidecar updates don't require reinstalling the app.
 ## Tech Stack
 | Component | Technology |
 |-----------|-----------|
 | Desktop shell | Tauri v2 (Rust + Svelte 5 / TypeScript) |
 | Transcription | faster-whisper (CTranslate2) |
 | Speaker ID | pyannote.audio 3.1 |
 | Audio UI | wavesurfer.js |
 | Transcript editor | TipTap (ProseMirror) |
 | AI (local) | Ollama (any model) |
 | AI (cloud) | OpenAI, Anthropic, OpenAI-compatible |
 | Caption export | pysubs2 |
 | Database | SQLite (rusqlite) |
 ## Development
 ### Prerequisites
 - Node.js 20+
 - Rust (stable)
 - Python 3.11+ with uv or pip
 - Linux: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`
 ### Getting Started
 ```bash
 # Install frontend dependencies
 npm install
 # Install Python sidecar dependencies
 cd python && pip install -e ".[dev]" && cd ..
 # Run in dev mode (uses system Python for the sidecar)
 npm run tauri:dev
 ```
 ### Building
 ```bash
 # Build the frozen Python sidecar (CPU-only)
 cd python && python build_sidecar.py --cpu-only && cd ..
 # Build with CUDA support
 cd python && python build_sidecar.py --with-cuda && cd ..
 # Build the Tauri app
 npm run tauri build
 ```
 ### CI/CD
 Two Gitea Actions workflows in `.gitea/workflows/`:
 **`release.yml`** — Triggers on push to main:
 1. Bumps app version (patch), creates git tag and Gitea release
 2. Builds lightweight app installers for all platforms (no sidecar bundled)
 **`build-sidecar.yml`** — Triggers on changes to `python/` or manual dispatch:
 1. Bumps sidecar version, creates `sidecar-v*` tag and release
 2. Builds CPU + CUDA variants for Linux/Windows, CPU for macOS
 3. Uploads as separate release assets
 #### Required Secrets
 | Secret | Purpose |
 |--------|---------|
 | `BUILD_TOKEN` | Gitea API token for creating releases and pushing tags |
 ### Project Structure
 ```
 src/                        # Svelte 5 frontend
  lib/components/           # UI components (waveform, transcript editor, settings, etc.)
  lib/stores/               # Svelte stores (settings, transcript state)
  routes/                   # SvelteKit pages
 src-tauri/                  # Rust backend
  src/sidecar/              # Sidecar process manager (download, extract, IPC)
  src/commands/             # Tauri command handlers
  nsis-hooks.nsh            # Windows uninstall cleanup
 python/                     # Python sidecar
  voice_to_notes/           # Python package (transcription, diarization, AI, export)
  build_sidecar.py          # PyInstaller build script
  voice_to_notes.spec       # PyInstaller spec
 .gitea/workflows/           # CI/CD (release.yml, build-sidecar.yml)
 docs/                       # Documentation
 ```
 ## License
-MIT
+[MIT](LICENSE)
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -0,0 +1,240 @@
 # Voice to Notes — User Guide
 ## Getting Started
 ### Installation
 Download the installer for your platform from the [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases) page:
 - **Windows:** `.msi` or `-setup.exe`
 - **Linux:** `.deb` or `.rpm`
 - **macOS:** `.dmg`
 ### First-Time Setup
 On first launch, Voice to Notes will prompt you to download its AI engine (the "sidecar"):
 1. Choose **Standard (CPU)** (~500 MB) or **GPU Accelerated (CUDA)** (~2 GB)
   - Choose CUDA if you have an NVIDIA GPU for significantly faster transcription
   - CPU works on all computers
 2. Click **Download & Install** and wait for the download to complete
 3. The app will proceed to the main interface once the sidecar is ready
 The sidecar only needs to be downloaded once. Updates are detected automatically on launch.
 ---
 ## Basic Workflow
 ### 1. Import Audio or Video
 - Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
 - **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA
 - **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted
 > **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system.
 ### 2. Transcribe
 After importing, click **Transcribe** to start the transcription pipeline:
 - **Transcription:** Converts speech to text with word-level timestamps
 - **Speaker Detection:** Identifies different speakers (if configured — see [Speaker Detection](#speaker-detection))
 - A progress bar shows the current stage and percentage
 ### 3. Review and Edit
 - The **waveform** displays at the top — click anywhere to seek
 - The **transcript** shows below with speaker labels and timestamps
 - **Click any word** in the transcript to jump to that point in the audio
 - The current word highlights during playback
 - **Edit text** directly in the transcript — word timings are preserved
 ### 4. Export
 Click **Export** and choose a format:
 | Format | Extension | Best For |
 |--------|-----------|----------|
 | SRT | `.srt` | Video subtitles (most compatible) |
 | WebVTT | `.vtt` | Web video players, HTML5 |
 | ASS/SSA | `.ass` | Styled subtitles with speaker colors |
 | Plain Text | `.txt` | Reading, sharing, pasting |
 | Markdown | `.md` | Documentation, notes |
 All formats include speaker labels when speaker detection is enabled.
 ### 5. Save Project
 - **Ctrl+S** (Cmd+S) saves the current project as a `.vtn` file
 - This preserves the full transcript, speaker assignments, and edits
 - Reopen later to continue editing or re-export
 ---
 ## Playback Controls
 | Action | Shortcut |
 |--------|----------|
 | Play / Pause | **Space** |
 | Skip back 5s | **Left Arrow** |
 | Skip forward 5s | **Right Arrow** |
 | Seek to word | Click any word in the transcript |
 | Import audio | **Ctrl+O** / **Cmd+O** |
 | Open settings | **Ctrl+,** / **Cmd+,** |
 ---
 ## Speaker Detection
 Speaker detection (diarization) identifies who is speaking at each point in the audio. It requires a one-time setup:
 ### Setup
 1. Go to **Settings > Speakers**
 2. Create a free account at [huggingface.co](https://huggingface.co/join)
 3. Accept the license on **all three** model pages:
   - [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
   - [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
   - [pyannote/speaker-diarization-community-1](https://huggingface.co/pyannote/speaker-diarization-community-1)
 4. Create a token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (read access is sufficient)
 5. Paste the token in Settings and click **Test & Download Model**
 ### Speaker Options
 - **Number of speakers:** Set to auto-detect or specify a fixed number for faster results
 - **Skip speaker detection:** Check this to only transcribe without identifying speakers
 ### Managing Speakers
 After transcription, speakers appear as "Speaker 1", "Speaker 2", etc. in the left sidebar. Double-click a speaker name to rename it — the new name appears throughout the transcript and in exports.
 ---
 ## AI Chat
 The AI chat panel lets you ask questions about your transcript. The AI sees the full transcript with speaker labels as context.
 Example prompts:
 - "Summarize this conversation"
 - "What were the key action items?"
 - "What did Speaker 1 say about the budget?"
 ### Setting Up Ollama (Local AI)
 [Ollama](https://ollama.com) runs AI models locally on your computer — no API keys or internet required.
 1. **Install Ollama:**
   - Download from [ollama.com](https://ollama.com)
   - Or on Linux: `curl -fsSL https://ollama.com/install.sh | sh`
 2. **Pull a model:**
   ```bash
   ollama pull llama3.2
   ```
   Other good options: `mistral`, `gemma2`, `phi3`
 3. **Configure in Voice to Notes:**
   - Go to **Settings > AI Provider**
   - Select **Ollama**
   - URL: `http://localhost:11434` (default, usually no change needed)
   - Model: `llama3.2` (or whichever model you pulled)
 4. **Use:** Open the AI chat panel (right sidebar) and start asking questions
 ### Cloud AI Providers
 If you prefer cloud-based AI:
 **OpenAI:**
 - Select **OpenAI** in Settings > AI Provider
 - Enter your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
 - Default model: `gpt-4o-mini`
 **Anthropic:**
 - Select **Anthropic** in Settings > AI Provider
 - Enter your API key from [console.anthropic.com](https://console.anthropic.com)
 - Default model: `claude-sonnet-4-6`
 **OpenAI Compatible:**
 - For any provider with an OpenAI-compatible API (vLLM, LiteLLM, etc.)
 - Enter the API base URL, key, and model name
 ---
 ## Settings Reference
 ### Transcription
 | Setting | Options | Default |
 |---------|---------|---------|
 | Whisper Model | tiny, base, small, medium, large-v3 | base |
 | Device | CPU, CUDA | CPU |
 | Language | Auto-detect, or specify (en, es, fr, etc.) | Auto-detect |
 **Model recommendations:**
 - **tiny/base:** Fast, good for clear audio with one speaker
 - **small:** Best balance of speed and accuracy
 - **medium:** Better accuracy, noticeably slower
 - **large-v3:** Best accuracy, requires 8GB+ VRAM (GPU) or 16GB+ RAM (CPU)
 ### Debug
 - **Enable Developer Tools:** Opens the browser inspector for debugging
 ---
 ## Installing FFmpeg
 FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription.
 **Windows:**
 ```
 winget install ffmpeg
 ```
 Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH.
 **macOS:**
 ```
 brew install ffmpeg
 ```
 **Linux (Debian/Ubuntu):**
 ```
 sudo apt install ffmpeg
 ```
 **Linux (Fedora/RHEL):**
 ```
 sudo dnf install ffmpeg
 ```
 After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.).
 ---
 ## Troubleshooting
 ### Video import fails / "FFmpeg not found"
 - Install FFmpeg using the instructions above
 - Make sure `ffmpeg` is in your system PATH
 - Restart Voice to Notes after installing
 ### Transcription is slow
 - Use a smaller model (tiny or base)
 - If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
 - Ensure you downloaded the CUDA sidecar during setup
 ### Speaker detection not working
 - Verify your HuggingFace token in Settings > Speakers
 - Click "Test & Download Model" to re-download
 - Make sure you accepted the license on all three model pages
 ### Audio won't play / No waveform
 - Check that the audio file still exists at its original location
 - Try re-importing the file
 - Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA
 ### App shows "Setting up Voice to Notes"
 - This is the first-launch sidecar download — it only happens once
 - If it fails, check your internet connection and click Retry
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "voice-to-notes",
-  "version": "0.1.0",
+  "version": "0.2.10",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "voice-to-notes",
-      "version": "0.1.0",
+      "version": "0.2.10",
      "license": "MIT",
      "dependencies": {
        "@tauri-apps/api": "^2",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "voice-to-notes",
-  "version": "0.1.0",
+  "version": "0.2.44",
  "description": "Desktop app for transcribing audio/video with speaker identification",
  "type": "module",
  "scripts": {
@@ -11,7 +11,9 @@
    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
    "lint": "eslint .",
    "test": "vitest",
-    "tauri": "tauri"
+    "tauri": "tauri",
    "tauri:dev": "VOICE_TO_NOTES_DEV=1 tauri dev",
    "sidecar:build": "cd python && python3 build_sidecar.py"
  },
  "license": "MIT",
  "dependencies": {
--- a/python/build_sidecar.py
+++ b/python/build_sidecar.py
@@ -0,0 +1,248 @@
 #!/usr/bin/env python3
 """Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
 Usage:
    python build_sidecar.py [--cpu-only]
 Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
 sidecar binary and all dependencies. The main binary is renamed to include
 the Tauri target triple for externalBin resolution.
 """
 from __future__ import annotations
 import argparse
 import os
 import platform
 import shutil
 import stat
 import subprocess
 import sys
 import urllib.request
 import zipfile
 from pathlib import Path
 SCRIPT_DIR = Path(__file__).resolve().parent
 DIST_DIR = SCRIPT_DIR / "dist"
 BUILD_DIR = SCRIPT_DIR / "build"
 SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
 # Static ffmpeg download URLs (GPL-licensed builds)
 FFMPEG_URLS: dict[str, str] = {
    "linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
    "darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
    "darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
    "win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
 }
 def get_target_triple() -> str:
    """Determine the Tauri-compatible target triple for the current platform."""
    machine = platform.machine().lower()
    system = platform.system().lower()
    arch_map = {
        "x86_64": "x86_64",
        "amd64": "x86_64",
        "aarch64": "aarch64",
        "arm64": "aarch64",
    }
    arch = arch_map.get(machine, machine)
    if system == "linux":
        return f"{arch}-unknown-linux-gnu"
    elif system == "darwin":
        return f"{arch}-apple-darwin"
    elif system == "windows":
        return f"{arch}-pc-windows-msvc"
    else:
        return f"{arch}-unknown-{system}"
 def _has_uv() -> bool:
    """Check if uv is available."""
    try:
        subprocess.run(["uv", "--version"], capture_output=True, check=True)
        return True
    except (FileNotFoundError, subprocess.CalledProcessError):
        return False
 def create_venv_and_install(cpu_only: bool) -> Path:
    """Create a fresh venv and install dependencies.
    Uses uv if available (much faster), falls back to standard venv + pip.
    """
    venv_dir = BUILD_DIR / "sidecar-venv"
    if venv_dir.exists():
        shutil.rmtree(venv_dir)
    use_uv = _has_uv()
    if use_uv:
        print(f"[build] Creating venv with uv at {venv_dir}")
        subprocess.run(
            ["uv", "venv", "--python", f"{sys.version_info.major}.{sys.version_info.minor}",
             str(venv_dir)],
            check=True,
        )
    else:
        print(f"[build] Creating venv at {venv_dir}")
        subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
    # Determine python path inside venv
    if sys.platform == "win32":
        python = str(venv_dir / "Scripts" / "python.exe")
    else:
        python = str(venv_dir / "bin" / "python")
    def pip_install(*args: str) -> None:
        """Install packages. Pass package names and flags only, not 'install'."""
        if use_uv:
            # Use --python with the venv directory (not the python binary) for uv
            subprocess.run(
                ["uv", "pip", "install", "--python", str(venv_dir), *args],
                check=True,
            )
        else:
            subprocess.run([python, "-m", "pip", "install", *args], check=True)
    if not use_uv:
        # Upgrade pip (uv doesn't need this)
        pip_install("--upgrade", "pip", "setuptools", "wheel")
    # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
    if cpu_only:
        print("[build] Installing PyTorch (CPU-only)")
        pip_install(
            "torch", "torchaudio",
            "--index-url", "https://download.pytorch.org/whl/cpu",
        )
    else:
        print("[build] Installing PyTorch (CUDA 12.6)")
        pip_install(
            "torch", "torchaudio",
            "--index-url", "https://download.pytorch.org/whl/cu126",
        )
    # Install project and dev deps (includes pyinstaller)
    print("[build] Installing project dependencies")
    pip_install("-e", f"{SCRIPT_DIR}[dev]")
    return Path(python)
 def run_pyinstaller(python: Path) -> Path:
    """Run PyInstaller using the spec file."""
    print("[build] Running PyInstaller")
    subprocess.run(
        [str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
        cwd=str(SCRIPT_DIR),
        check=True,
    )
    output_dir = DIST_DIR / "voice-to-notes-sidecar"
    if not output_dir.exists():
        raise RuntimeError(f"PyInstaller output not found at {output_dir}")
    return output_dir
 def download_ffmpeg(output_dir: Path) -> None:
    """Download a static ffmpeg/ffprobe binary for the current platform."""
    system = sys.platform
    machine = platform.machine().lower()
    if machine in ("amd64", "x86_64"):
        machine = "x86_64"
    elif machine in ("aarch64", "arm64"):
        machine = "arm64"
    key = f"{system}-{machine}"
    if system == "win32":
        key = f"win32-{machine}"
    elif system == "linux":
        key = f"linux-{machine}"
    url = FFMPEG_URLS.get(key)
    if not url:
        print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
        return
    print(f"[build] Downloading ffmpeg for {key}")
    tmp_path = output_dir / "ffmpeg_download"
    try:
        urllib.request.urlretrieve(url, str(tmp_path))
        if url.endswith(".tar.xz"):
            # Linux static build
            import tarfile
            with tarfile.open(str(tmp_path), "r:xz") as tar:
                for member in tar.getmembers():
                    basename = os.path.basename(member.name)
                    if basename in ("ffmpeg", "ffprobe"):
                        member.name = basename
                        tar.extract(member, path=str(output_dir))
                        dest = output_dir / basename
                        dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
        elif url.endswith(".zip"):
            with zipfile.ZipFile(str(tmp_path), "r") as zf:
                for name in zf.namelist():
                    basename = os.path.basename(name)
                    if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
                        data = zf.read(name)
                        dest = output_dir / basename
                        dest.write_bytes(data)
                        if sys.platform != "win32":
                            dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
        print("[build] ffmpeg downloaded successfully")
    except Exception as e:
        print(f"[build] Warning: Failed to download ffmpeg: {e}")
    finally:
        if tmp_path.exists():
            tmp_path.unlink()
 def rename_binary(output_dir: Path, target_triple: str) -> None:
    """Rename the main binary to include the target triple for Tauri."""
    if sys.platform == "win32":
        src = output_dir / "voice-to-notes-sidecar.exe"
        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
    else:
        src = output_dir / "voice-to-notes-sidecar"
        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
    if src.exists():
        print(f"[build] Renaming {src.name} -> {dst.name}")
        src.rename(dst)
    else:
        print(f"[build] Warning: Expected binary not found at {src}")
 def main() -> None:
    parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
    parser.add_argument(
        "--cpu-only",
        action="store_true",
        default=True,
        help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
    )
    parser.add_argument(
        "--with-cuda",
        action="store_true",
        help="Install PyTorch with CUDA support",
    )
    args = parser.parse_args()
    cpu_only = not args.with_cuda
    target_triple = get_target_triple()
    print(f"[build] Target triple: {target_triple}")
    print(f"[build] CPU-only: {cpu_only}")
    python = create_venv_and_install(cpu_only)
    output_dir = run_pyinstaller(python)
    download_ffmpeg(output_dir)
    print(f"\n[build] Done! Sidecar built at: {output_dir}")
    print(f"[build] Copy directory to src-tauri/sidecar/ for Tauri resource bundling")
 if __name__ == "__main__":
    main()
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "1.0.13"
 description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
 requires-python = ">=3.11"
 license = "MIT"
@@ -13,6 +13,9 @@ dependencies = [
    "faster-whisper>=1.1.0",
    "pyannote.audio>=3.1.0",
    "pysubs2>=1.7.0",
    "openai>=1.0.0",
    "anthropic>=0.20.0",
    "soundfile>=0.12.0",
 ]
 [project.optional-dependencies]
@@ -20,6 +23,7 @@ dev = [
    "ruff>=0.8.0",
    "pytest>=8.0.0",
    "pytest-asyncio>=0.24.0",
    "pyinstaller>=6.0",
 ]
 [tool.ruff]
--- a/python/tests/test_diarize.py
+++ b/python/tests/test_diarize.py
@@ -1,7 +1,13 @@
 """Tests for diarization service data structures and payload conversion."""
 import time
 from unittest.mock import MagicMock, patch
 import pytest
 from voice_to_notes.services.diarize import (
    DiarizationResult,
    DiarizeService,
    SpeakerSegment,
    diarization_to_payload,
 )
@@ -31,3 +37,74 @@ def test_diarization_to_payload_empty():
    assert payload["num_speakers"] == 0
    assert payload["speaker_segments"] == []
    assert payload["speakers"] == []
 def test_diarize_threading_progress(monkeypatch):
    """Test that diarization emits progress while running in background thread."""
    # Track written messages
    written_messages = []
    def mock_write(msg):
        written_messages.append(msg)
    # Mock pipeline that takes ~5 seconds
    def slow_pipeline(file_path, **kwargs):
        time.sleep(5)
        # Return a mock diarization result (use spec=object to prevent
        # hasattr returning True for speaker_diarization)
        mock_result = MagicMock(spec=[])
        mock_track = MagicMock()
        mock_track.start = 0.0
        mock_track.end = 5.0
        mock_result.itertracks = MagicMock(return_value=[(mock_track, None, "SPEAKER_00")])
        return mock_result
    mock_pipeline_obj = MagicMock()
    mock_pipeline_obj.side_effect = slow_pipeline
    service = DiarizeService()
    service._pipeline = mock_pipeline_obj
    with patch("voice_to_notes.services.diarize.write_message", mock_write):
        result = service.diarize(
            request_id="req-1",
            file_path="/fake/audio.wav",
            audio_duration_sec=60.0,
        )
    # Filter for diarizing progress messages (not loading_diarization or done)
    diarizing_msgs = [
        m for m in written_messages
        if m.type == "progress" and m.payload.get("stage") == "diarizing"
        and "elapsed" in m.payload.get("message", "")
    ]
    # Should have at least 1 progress message (5s sleep / 2s interval = ~2 messages)
    assert len(diarizing_msgs) >= 1, (
        f"Expected at least 1 diarizing progress message, got {len(diarizing_msgs)}"
    )
    # Progress percent should be between 20 and 85
    for msg in diarizing_msgs:
        pct = msg.payload["percent"]
        assert 20 <= pct <= 85, f"Progress {pct} out of expected range 20-85"
    # Result should be valid
    assert result.num_speakers == 1
    assert result.speakers == ["SPEAKER_00"]
 def test_diarize_threading_error_propagation(monkeypatch):
    """Test that errors from the background thread are properly raised."""
    mock_pipeline_obj = MagicMock()
    mock_pipeline_obj.side_effect = RuntimeError("Pipeline crashed")
    service = DiarizeService()
    service._pipeline = mock_pipeline_obj
    with patch("voice_to_notes.services.diarize.write_message", lambda m: None):
        with pytest.raises(RuntimeError, match="Pipeline crashed"):
            service.diarize(
                request_id="req-1",
                file_path="/fake/audio.wav",
                audio_duration_sec=30.0,
            )
--- a/python/tests/test_messages.py
+++ b/python/tests/test_messages.py
@@ -3,8 +3,10 @@
 from voice_to_notes.ipc.messages import (
    IPCMessage,
    error_message,
    partial_segment_message,
    progress_message,
    ready_message,
    speaker_update_message,
 )
@@ -48,3 +50,16 @@ def test_ready_message():
    assert msg.type == "ready"
    assert msg.id == "system"
    assert "version" in msg.payload
 def test_partial_segment_message():
    msg = partial_segment_message("req-1", {"index": 0, "text": "hello"})
    assert msg.type == "pipeline.segment"
    assert msg.payload["index"] == 0
    assert msg.payload["text"] == "hello"
 def test_speaker_update_message():
    msg = speaker_update_message("req-1", [{"index": 0, "speaker": "SPEAKER_00"}])
    assert msg.type == "pipeline.speaker_update"
    assert msg.payload["updates"][0]["speaker"] == "SPEAKER_00"
--- a/python/tests/test_pipeline.py
+++ b/python/tests/test_pipeline.py
@@ -88,3 +88,18 @@ def test_merge_results_no_speaker_segments():
    result = service._merge_results(transcription, [])
    assert result.segments[0].speaker is None
 def test_speaker_update_generation():
    """Test that speaker updates are generated after merge."""
    result = PipelineResult(
        segments=[
            PipelineSegment(text="Hello", start_ms=0, end_ms=1000, speaker="SPEAKER_00"),
            PipelineSegment(text="World", start_ms=1000, end_ms=2000, speaker="SPEAKER_01"),
            PipelineSegment(text="Foo", start_ms=2000, end_ms=3000, speaker=None),
        ],
    )
    updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
    assert len(updates) == 2
    assert updates[0] == {"index": 0, "speaker": "SPEAKER_00"}
    assert updates[1] == {"index": 1, "speaker": "SPEAKER_01"}
--- a/python/tests/test_protocol.py
+++ b/python/tests/test_protocol.py
@@ -5,16 +5,23 @@ import json
 from voice_to_notes.ipc.messages import IPCMessage
 from voice_to_notes.ipc.protocol import read_message, write_message
 import voice_to_notes.ipc.protocol as protocol
-def test_write_message(capsys):
+def test_write_message():
    buf = io.StringIO()
    # Temporarily replace the IPC output stream
    old_out = protocol._ipc_out
    protocol._ipc_out = buf
    try:
        msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
        write_message(msg)
-    captured = capsys.readouterr()
+        parsed = json.loads(buf.getvalue().strip())
    parsed = json.loads(captured.out.strip())
        assert parsed["id"] == "req-1"
        assert parsed["type"] == "pong"
        assert parsed["payload"]["ok"] is True
    finally:
        protocol._ipc_out = old_out
 def test_read_message(monkeypatch):
--- a/python/tests/test_transcribe.py
+++ b/python/tests/test_transcribe.py
@@ -1,7 +1,10 @@
 """Tests for transcription service."""
 import inspect
 from voice_to_notes.services.transcribe import (
    SegmentResult,
    TranscribeService,
    TranscriptionResult,
    WordResult,
    result_to_payload,
@@ -49,3 +52,149 @@ def test_result_to_payload_empty():
    assert payload["segments"] == []
    assert payload["language"] == ""
    assert payload["duration_ms"] == 0
 def test_on_segment_callback():
    """Test that on_segment callback is invoked with correct SegmentResult and index."""
    callback_args = []
    def mock_callback(seg: SegmentResult, index: int):
        callback_args.append((seg.text, index))
    # Test that passing on_segment doesn't break the function signature
    # (Full integration test would require mocking WhisperModel)
    service = TranscribeService()
    # Verify the parameter exists by checking the signature
    sig = inspect.signature(service.transcribe)
    assert "on_segment" in sig.parameters
 def test_progress_every_segment(monkeypatch):
    """Verify a progress message is sent for every segment, not just every 5th."""
    from unittest.mock import MagicMock, patch
    from voice_to_notes.services.transcribe import TranscribeService
    # Mock WhisperModel
    mock_model = MagicMock()
    # Create mock segments (8 of them to test > 5)
    mock_segments = []
    for i in range(8):
        seg = MagicMock()
        seg.start = i * 1.0
        seg.end = (i + 1) * 1.0
        seg.text = f"Segment {i}"
        seg.words = []
        mock_segments.append(seg)
    # Mock info object
    mock_info = MagicMock()
    mock_info.language = "en"
    mock_info.language_probability = 0.99
    mock_info.duration = 8.0
    mock_model.transcribe.return_value = (iter(mock_segments), mock_info)
    # Track write_message calls
    written_messages = []
    def mock_write(msg):
        written_messages.append(msg)
    service = TranscribeService()
    service._model = mock_model
    service._current_model_name = "base"
    service._current_device = "cpu"
    service._current_compute_type = "int8"
    with patch("voice_to_notes.services.transcribe.write_message", mock_write):
        service.transcribe("req-1", "/fake/audio.wav")
    # Filter for "transcribing" stage progress messages
    transcribing_msgs = [
        m for m in written_messages
        if m.type == "progress" and m.payload.get("stage") == "transcribing"
    ]
    # Should have one per segment (8) + the initial "Starting transcription..." message
    # The initial "Starting transcription..." is also stage "transcribing" — so 8 + 1 = 9
    assert len(transcribing_msgs) >= 8, (
        f"Expected at least 8 transcribing progress messages (one per segment), got {len(transcribing_msgs)}"
    )
 def test_chunk_report_size_progress():
    """Test CHUNK_REPORT_SIZE progress emission."""
    from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE
    assert CHUNK_REPORT_SIZE == 10
 def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch):
    """Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel."""
    from unittest.mock import MagicMock, patch
    from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult
    # Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each)
    original_run = __import__("subprocess").run
    def mock_subprocess_run(cmd, **kwargs):
        if "ffprobe" in cmd:
            result = MagicMock()
            result.stdout = "700.0\n"
            result.returncode = 0
            return result
        elif "ffmpeg" in cmd:
            # Create an empty temp file (simulate chunk extraction)
            # The output file is the last argument
            import pathlib
            output_file = cmd[-1]
            pathlib.Path(output_file).touch()
            result = MagicMock()
            result.returncode = 0
            return result
        return original_run(cmd, **kwargs)
    # Mock WhisperModel
    mock_model = MagicMock()
    def mock_transcribe_call(file_path, **kwargs):
        mock_segments = []
        for i in range(3):
            seg = MagicMock()
            seg.start = i * 1.0
            seg.end = (i + 1) * 1.0
            seg.text = f"Segment {i}"
            seg.words = []
            mock_segments.append(seg)
        mock_info = MagicMock()
        mock_info.language = "en"
        mock_info.language_probability = 0.99
        mock_info.duration = 300.0
        return iter(mock_segments), mock_info
    mock_model.transcribe = mock_transcribe_call
    service = TranscribeService()
    service._model = mock_model
    service._current_model_name = "base"
    service._current_device = "cpu"
    service._current_compute_type = "int8"
    written_messages = []
    def mock_write(msg):
        written_messages.append(msg)
    with patch("subprocess.run", mock_subprocess_run), \
         patch("voice_to_notes.services.transcribe.write_message", mock_write):
        result = service.transcribe_chunked("req-1", "/fake/long_audio.wav")
    # Should have segments from multiple chunks
    assert len(result.segments) > 0
    # Verify timestamp offsets — segments from chunk 1 should start at 0,
    # segments from chunk 2 should be offset by 300000ms
    if len(result.segments) > 3:
        # Chunk 2 segments should have offset timestamps
        assert result.segments[3].start_ms >= 300000
    assert result.duration_ms == 700000
    assert result.language == "en"
--- a/python/voice_to_notes.spec
+++ b/python/voice_to_notes.spec
@@ -0,0 +1,80 @@
 # -*- mode: python ; coding: utf-8 -*-
 """PyInstaller spec for the Voice to Notes sidecar binary."""
 from PyInstaller.utils.hooks import collect_all
 block_cipher = None
 # Collect all files for packages that have shared libraries / data files
 # PyInstaller often misses these for ML packages
 ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
 faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
    "faster_whisper"
 )
 pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
 soundfile_datas, soundfile_binaries, soundfile_hiddenimports = collect_all("soundfile")
 a = Analysis(
    ["voice_to_notes/main.py"],
    pathex=[],
    binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries + soundfile_binaries,
    datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas + soundfile_datas,
    hiddenimports=[
        "torch",
        "torchaudio",
        "soundfile",
        "huggingface_hub",
        "pysubs2",
        "openai",
        "anthropic",
        "litellm",
    ]
    + ctranslate2_hiddenimports
    + faster_whisper_hiddenimports
    + pyannote_hiddenimports
    + soundfile_hiddenimports,
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[
        "tkinter", "test", "pip", "setuptools",
        # ctranslate2.converters imports torch at module level and causes
        # circular import crashes under PyInstaller. These modules are only
        # needed for model format conversion, never for inference.
        "ctranslate2.converters",
        # torchcodec is partially bundled by PyInstaller but non-functional
        # (missing FFmpeg shared libs). Excluding it forces pyannote.audio
        # to fall back to torchaudio for audio decoding.
        "torchcodec",
    ],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
 )
 pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name="voice-to-notes-sidecar",
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=True,
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.zipfiles,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name="voice-to-notes-sidecar",
 )
--- a/python/voice_to_notes/hardware/detect.py
+++ b/python/voice_to_notes/hardware/detect.py
@@ -2,7 +2,10 @@
 from __future__ import annotations
 import ctypes
 import os
 import platform
 import subprocess
 import sys
 from dataclasses import dataclass
@@ -21,6 +24,77 @@ class HardwareInfo:
    recommended_compute_type: str = "int8"
 def _detect_ram_mb() -> int:
    """Detect total system RAM in MB (cross-platform).
    Tries platform-specific methods in order:
    1. Linux: read /proc/meminfo
    2. macOS: sysctl hw.memsize
    3. Windows: GlobalMemoryStatusEx via ctypes
    4. Fallback: os.sysconf (most Unix systems)
    Returns 0 if all methods fail.
    """
    # Linux: read /proc/meminfo
    if sys.platform == "linux":
        try:
            with open("/proc/meminfo") as f:
                for line in f:
                    if line.startswith("MemTotal:"):
                        # Value is in kB
                        return int(line.split()[1]) // 1024
        except (FileNotFoundError, ValueError, OSError):
            pass
    # macOS: sysctl hw.memsize (returns bytes)
    if sys.platform == "darwin":
        try:
            result = subprocess.run(
                ["sysctl", "-n", "hw.memsize"],
                capture_output=True,
                text=True,
                check=True,
            )
            return int(result.stdout.strip()) // (1024 * 1024)
        except (subprocess.SubprocessError, ValueError, OSError):
            pass
    # Windows: GlobalMemoryStatusEx via ctypes
    if sys.platform == "win32":
        try:
            class MEMORYSTATUSEX(ctypes.Structure):
                _fields_ = [
                    ("dwLength", ctypes.c_ulong),
                    ("dwMemoryLoad", ctypes.c_ulong),
                    ("ullTotalPhys", ctypes.c_ulonglong),
                    ("ullAvailPhys", ctypes.c_ulonglong),
                    ("ullTotalPageFile", ctypes.c_ulonglong),
                    ("ullAvailPageFile", ctypes.c_ulonglong),
                    ("ullTotalVirtual", ctypes.c_ulonglong),
                    ("ullAvailVirtual", ctypes.c_ulonglong),
                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
                ]
            mem_status = MEMORYSTATUSEX()
            mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
            if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
                return int(mem_status.ullTotalPhys) // (1024 * 1024)
        except (AttributeError, OSError):
            pass
    # Fallback: os.sysconf (works on most Unix systems)
    try:
        page_size = os.sysconf("SC_PAGE_SIZE")
        phys_pages = os.sysconf("SC_PHYS_PAGES")
        if page_size > 0 and phys_pages > 0:
            return (page_size * phys_pages) // (1024 * 1024)
    except (ValueError, OSError, AttributeError):
        pass
    return 0
 def detect_hardware() -> HardwareInfo:
    """Detect available hardware and recommend model configuration."""
    info = HardwareInfo()
@@ -28,25 +102,26 @@ def detect_hardware() -> HardwareInfo:
    # CPU info
    info.cpu_cores = os.cpu_count() or 1
-    # RAM info
+    # RAM info (cross-platform)
-    try:
+    info.ram_mb = _detect_ram_mb()
        with open("/proc/meminfo") as f:
            for line in f:
                if line.startswith("MemTotal:"):
                    # Value is in kB
                    info.ram_mb = int(line.split()[1]) // 1024
                    break
    except (FileNotFoundError, ValueError):
        pass
-    # CUDA detection
+    # CUDA detection — verify runtime libraries actually work, not just torch detection
    try:
        import torch
        if torch.cuda.is_available():
            # Test that CUDA runtime libraries are actually loadable
            try:
                torch.zeros(1, device="cuda")
                info.has_cuda = True
                info.cuda_device_name = torch.cuda.get_device_name(0)
                info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
            except Exception as e:
                print(
                    f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
                    file=sys.stderr,
                    flush=True,
                )
    except ImportError:
        print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)
--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -41,11 +41,15 @@ def ping_handler(msg: IPCMessage) -> IPCMessage:
 def make_transcribe_handler() -> HandlerFunc:
    """Create a transcription handler with a persistent TranscribeService."""
-    from voice_to_notes.services.transcribe import TranscribeService, result_to_payload
+    service = None
    service = TranscribeService()
    def handler(msg: IPCMessage) -> IPCMessage:
        nonlocal service
        if service is None:
            from voice_to_notes.services.transcribe import TranscribeService
            service = TranscribeService()
        from voice_to_notes.services.transcribe import result_to_payload
        payload = msg.payload
        result = service.transcribe(
            request_id=msg.id,
@@ -66,11 +70,15 @@ def make_transcribe_handler() -> HandlerFunc:
 def make_diarize_handler() -> HandlerFunc:
    """Create a diarization handler with a persistent DiarizeService."""
-    from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
+    service = None
    service = DiarizeService()
    def handler(msg: IPCMessage) -> IPCMessage:
        nonlocal service
        if service is None:
            from voice_to_notes.services.diarize import DiarizeService
            service = DiarizeService()
        from voice_to_notes.services.diarize import diarization_to_payload
        payload = msg.payload
        result = service.diarize(
            request_id=msg.id,
@@ -88,13 +96,90 @@ def make_diarize_handler() -> HandlerFunc:
    return handler
-def make_pipeline_handler() -> HandlerFunc:
+def make_diarize_download_handler() -> HandlerFunc:
-    """Create a full pipeline handler (transcribe + diarize + merge)."""
+    """Create a handler that downloads/validates the diarization model."""
-    from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
+    import os
    service = PipelineService()
    def handler(msg: IPCMessage) -> IPCMessage:
        payload = msg.payload
        hf_token = payload.get("hf_token")
        try:
            import huggingface_hub
            # Disable pyannote telemetry (has a bug in v4.0.4)
            os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
            from pyannote.audio import Pipeline
            # Persist token globally so ALL huggingface_hub downloads use auth.
            # Setting env var alone isn't enough — pyannote's internal sub-downloads
            # (e.g. PLDA.from_pretrained) don't forward the token= parameter.
            # login() writes the token to ~/.cache/huggingface/token which
            # huggingface_hub reads automatically for all downloads.
            if hf_token:
                os.environ["HF_TOKEN"] = hf_token
                huggingface_hub.login(token=hf_token, add_to_git_credential=False)
            # Pre-download sub-models that pyannote loads internally.
            # This ensures they're cached before Pipeline.from_pretrained
            # tries to load them (where token forwarding can fail).
            sub_models = [
                "pyannote/segmentation-3.0",
                "pyannote/speaker-diarization-community-1",
            ]
            for model_id in sub_models:
                print(f"[sidecar] Pre-downloading {model_id}...", file=sys.stderr, flush=True)
                huggingface_hub.snapshot_download(model_id, token=hf_token)
            print("[sidecar] Downloading diarization pipeline...", file=sys.stderr, flush=True)
            pipeline = Pipeline.from_pretrained(
                "pyannote/speaker-diarization-3.1",
                token=hf_token,
            )
            print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
            return IPCMessage(
                id=msg.id,
                type="diarize.download.result",
                payload={"ok": True},
            )
        except Exception as e:
            error_msg = str(e)
            print(f"[sidecar] Model download error: {error_msg}", file=sys.stderr, flush=True)
            # Make common errors more user-friendly
            if "403" in error_msg or "gated" in error_msg.lower():
                # Try to extract the specific model name from the error
                import re
                model_match = re.search(r"pyannote/[\w-]+", error_msg)
                if model_match:
                    model_name = model_match.group(0)
                    error_msg = (
                        f"Access denied for {model_name}. "
                        f"Please visit huggingface.co/{model_name} "
                        f"and accept the license agreement, then try again."
                    )
                else:
                    error_msg = (
                        "Access denied. Please accept the license agreements for all "
                        "required pyannote models on HuggingFace."
                    )
            elif "401" in error_msg:
                error_msg = "Invalid token. Please check your HuggingFace token."
            return error_message(msg.id, "download_error", error_msg)
    return handler
 def make_pipeline_handler() -> HandlerFunc:
    """Create a full pipeline handler (transcribe + diarize + merge)."""
    service = None
    def handler(msg: IPCMessage) -> IPCMessage:
        nonlocal service
        if service is None:
            from voice_to_notes.services.pipeline import PipelineService
            service = PipelineService()
        from voice_to_notes.services.pipeline import pipeline_result_to_payload
        payload = msg.payload
        result = service.run(
            request_id=msg.id,
@@ -107,6 +192,7 @@ def make_pipeline_handler() -> HandlerFunc:
            min_speakers=payload.get("min_speakers"),
            max_speakers=payload.get("max_speakers"),
            skip_diarization=payload.get("skip_diarization", False),
            hf_token=payload.get("hf_token"),
        )
        return IPCMessage(
            id=msg.id,
@@ -119,11 +205,15 @@ def make_pipeline_handler() -> HandlerFunc:
 def make_export_handler() -> HandlerFunc:
    """Create an export handler."""
-    from voice_to_notes.services.export import ExportService, make_export_request
+    service = None
    service = ExportService()
    def handler(msg: IPCMessage) -> IPCMessage:
        nonlocal service
        if service is None:
            from voice_to_notes.services.export import ExportService
            service = ExportService()
        from voice_to_notes.services.export import make_export_request
        request = make_export_request(msg.payload)
        output_path = service.export(request)
        return IPCMessage(
@@ -137,11 +227,14 @@ def make_export_handler() -> HandlerFunc:
 def make_ai_chat_handler() -> HandlerFunc:
    """Create an AI chat handler with persistent AIProviderService."""
-    from voice_to_notes.services.ai_provider import create_default_service
+    service = None
    service = create_default_service()
    def handler(msg: IPCMessage) -> IPCMessage:
        nonlocal service
        if service is None:
            from voice_to_notes.services.ai_provider import create_default_service
            service = create_default_service()
        payload = msg.payload
        action = payload.get("action", "chat")
@@ -161,15 +254,15 @@ def make_ai_chat_handler() -> HandlerFunc:
            )
        if action == "configure":
-            # Re-create a provider with custom settings
+            # Re-create a provider with custom settings and set it active
            provider_name = payload.get("provider", "")
            config = payload.get("config", {})
            if provider_name == "local":
                from voice_to_notes.providers.local_provider import LocalProvider
                service.register_provider("local", LocalProvider(
-                    base_url=config.get("base_url", "http://localhost:8080"),
+                    base_url=config.get("base_url", "http://localhost:11434/v1"),
-                    model=config.get("model", "local"),
+                    model=config.get("model", "llama3.2"),
                ))
            elif provider_name == "openai":
                from voice_to_notes.providers.openai_provider import OpenAIProvider
@@ -186,11 +279,17 @@ def make_ai_chat_handler() -> HandlerFunc:
                    model=config.get("model", "claude-sonnet-4-6"),
                ))
            elif provider_name == "litellm":
-                from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+                from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
-                service.register_provider("litellm", LiteLLMProvider(
+                service.register_provider("litellm", OpenAICompatibleProvider(
                    model=config.get("model", "gpt-4o-mini"),
                    api_key=config.get("api_key"),
                    api_base=config.get("api_base"),
                ))
            # Set the configured provider as active
            print(f"[sidecar] Configured AI provider: {provider_name} with config: {config}", file=sys.stderr, flush=True)
            if provider_name in ("local", "openai", "anthropic", "litellm"):
                service.set_active(provider_name)
            return IPCMessage(
                id=msg.id,
                type="ai.configured",
--- a/python/voice_to_notes/ipc/messages.py
+++ b/python/voice_to_notes/ipc/messages.py
@@ -34,6 +34,14 @@ def progress_message(request_id: str, percent: int, stage: str, message: str) ->
    )
 def partial_segment_message(request_id: str, segment_data: dict) -> IPCMessage:
    return IPCMessage(id=request_id, type="pipeline.segment", payload=segment_data)
 def speaker_update_message(request_id: str, updates: list[dict]) -> IPCMessage:
    return IPCMessage(id=request_id, type="pipeline.speaker_update", payload={"updates": updates})
 def error_message(request_id: str, code: str, message: str) -> IPCMessage:
    return IPCMessage(
        id=request_id,
--- a/python/voice_to_notes/ipc/protocol.py
+++ b/python/voice_to_notes/ipc/protocol.py
@@ -1,13 +1,53 @@
-"""JSON-line protocol reader/writer over stdin/stdout."""
+"""JSON-line protocol reader/writer over stdin/stdout.
 IMPORTANT: stdout is reserved exclusively for IPC messages.
 At init time we save the real stdout, then redirect sys.stdout → stderr
 so that any rogue print() calls from libraries don't corrupt the IPC stream.
 """
 from __future__ import annotations
 import io
 import json
 import os
 import sys
 from typing import Any
 from voice_to_notes.ipc.messages import IPCMessage
 # Save the real stdout fd for IPC before any library can pollute it.
 # Then redirect sys.stdout to stderr so library prints go to stderr.
 _ipc_out: io.TextIOWrapper | None = None
 def init_ipc() -> None:
    """Capture real stdout for IPC and redirect sys.stdout to stderr.
    Must be called once at sidecar startup, before importing any ML libraries.
    """
    global _ipc_out
    if _ipc_out is not None:
        return  # already initialised
    # Duplicate the real stdout fd so we keep it even after redirect
    real_stdout_fd = os.dup(sys.stdout.fileno())
    _ipc_out = io.TextIOWrapper(
        io.BufferedWriter(io.FileIO(real_stdout_fd, "w")),
        encoding="utf-8",
        line_buffering=True,
    )
    # Redirect sys.stdout → stderr so print() from libraries goes to stderr
    sys.stdout = sys.stderr
 def _get_ipc_out() -> io.TextIOWrapper:
    """Return the IPC output stream, falling back to sys.__stdout__."""
    if _ipc_out is not None:
        return _ipc_out
    # Fallback if init_ipc() was never called (e.g. in tests)
    return sys.__stdout__
 def read_message() -> IPCMessage | None:
    """Read a single JSON-line message from stdin. Returns None on EOF."""
@@ -29,17 +69,19 @@ def read_message() -> IPCMessage | None:
 def write_message(msg: IPCMessage) -> None:
-    """Write a JSON-line message to stdout."""
+    """Write a JSON-line message to the IPC channel (real stdout)."""
    out = _get_ipc_out()
    line = json.dumps(msg.to_dict(), separators=(",", ":"))
-    sys.stdout.write(line + "\n")
+    out.write(line + "\n")
-    sys.stdout.flush()
+    out.flush()
 def write_dict(data: dict[str, Any]) -> None:
-    """Write a raw dict as a JSON-line message to stdout."""
+    """Write a raw dict as a JSON-line message to the IPC channel."""
    out = _get_ipc_out()
    line = json.dumps(data, separators=(",", ":"))
-    sys.stdout.write(line + "\n")
+    out.write(line + "\n")
-    sys.stdout.flush()
+    out.flush()
 def _log(message: str) -> None:
--- a/python/voice_to_notes/main.py
+++ b/python/voice_to_notes/main.py
@@ -5,18 +5,26 @@ from __future__ import annotations
 import signal
 import sys
-from voice_to_notes.ipc.handlers import (
+
 # CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
 # that might print to stdout and corrupt the JSON-line protocol.
 from voice_to_notes.ipc.protocol import init_ipc
 init_ipc()
 from voice_to_notes.ipc.handlers import (  # noqa: E402
    HandlerRegistry,
    hardware_detect_handler,
    make_ai_chat_handler,
    make_diarize_download_handler,
    make_diarize_handler,
    make_export_handler,
    make_pipeline_handler,
    make_transcribe_handler,
    ping_handler,
 )
-from voice_to_notes.ipc.messages import ready_message
+from voice_to_notes.ipc.messages import ready_message  # noqa: E402
-from voice_to_notes.ipc.protocol import read_message, write_message
+from voice_to_notes.ipc.protocol import read_message, write_message  # noqa: E402
 def create_registry() -> HandlerRegistry:
@@ -26,6 +34,7 @@ def create_registry() -> HandlerRegistry:
    registry.register("transcribe.start", make_transcribe_handler())
    registry.register("hardware.detect", hardware_detect_handler)
    registry.register("diarize.start", make_diarize_handler())
    registry.register("diarize.download", make_diarize_download_handler())
    registry.register("pipeline.start", make_pipeline_handler())
    registry.register("export.start", make_export_handler())
    registry.register("ai.chat", make_ai_chat_handler())
--- a/python/voice_to_notes/providers/litellm_provider.py
+++ b/python/voice_to_notes/providers/litellm_provider.py
@@ -1,4 +1,4 @@
-"""LiteLLM provider — multi-provider gateway."""
+"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""
 from __future__ import annotations
@@ -7,36 +7,44 @@ from typing import Any
 from voice_to_notes.providers.base import AIProvider
-class LiteLLMProvider(AIProvider):
+class OpenAICompatibleProvider(AIProvider):
-    """Routes through LiteLLM for access to 100+ LLM providers."""
+    """Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""
-    def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
+    def __init__(
        self,
        api_key: str | None = None,
        api_base: str | None = None,
        model: str = "gpt-4o-mini",
        **kwargs: Any,
    ) -> None:
        self._api_key = api_key or "sk-no-key"
        self._api_base = api_base
        self._model = model
        self._extra_kwargs = kwargs
    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
-        try:
+        from openai import OpenAI
            import litellm
        except ImportError:
            raise RuntimeError("litellm package is required. Install with: pip install litellm")
-        merged_kwargs = {**self._extra_kwargs, **kwargs}
+        client_kwargs: dict[str, Any] = {"api_key": self._api_key}
-        response = litellm.completion(
+        if self._api_base:
-            model=merged_kwargs.get("model", self._model),
+            client_kwargs["base_url"] = self._api_base
        client = OpenAI(**client_kwargs)
        response = client.chat.completions.create(
            model=kwargs.get("model", self._model),
            messages=messages,
-            temperature=merged_kwargs.get("temperature", 0.7),
+            temperature=kwargs.get("temperature", 0.7),
-            max_tokens=merged_kwargs.get("max_tokens", 2048),
+            max_tokens=kwargs.get("max_tokens", 2048),
        )
        return response.choices[0].message.content or ""
    def is_available(self) -> bool:
        try:
-            import litellm  # noqa: F401
+            import openai  # noqa: F401
-
+            return bool(self._api_key and self._api_base)
            return True
        except ImportError:
            return False
    @property
    def name(self) -> str:
-        return "LiteLLM"
+        return "OpenAI Compatible"
--- a/python/voice_to_notes/providers/local_provider.py
+++ b/python/voice_to_notes/providers/local_provider.py
@@ -1,4 +1,4 @@
-"""Local AI provider — bundled llama-server (OpenAI-compatible API)."""
+"""Local AI provider — Ollama or any OpenAI-compatible API."""
 from __future__ import annotations
@@ -9,9 +9,9 @@ from voice_to_notes.providers.base import AIProvider
 class LocalProvider(AIProvider):
-    """Connects to bundled llama-server via its OpenAI-compatible API."""
+    """Connects to Ollama or any OpenAI-compatible API server."""
-    def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
+    def __init__(self, base_url: str = "http://localhost:11434/v1", model: str = "llama3.2") -> None:
        self._base_url = base_url.rstrip("/")
        self._model = model
        self._client: Any = None
@@ -24,8 +24,8 @@ class LocalProvider(AIProvider):
            from openai import OpenAI
            self._client = OpenAI(
-                base_url=f"{self._base_url}/v1",
+                base_url=self._base_url,
-                api_key="not-needed",  # llama-server doesn't require an API key
+                api_key="ollama",  # Ollama doesn't require a real key
            )
        except ImportError:
            raise RuntimeError(
@@ -47,7 +47,9 @@ class LocalProvider(AIProvider):
        try:
            import urllib.request
-            req = urllib.request.Request(f"{self._base_url}/health", method="GET")
+            # Check base URL without /v1 suffix for Ollama root endpoint
            root_url = self._base_url.replace("/v1", "")
            req = urllib.request.Request(root_url, method="GET")
            with urllib.request.urlopen(req, timeout=2) as resp:
                return resp.status == 200
        except Exception:
@@ -55,4 +57,4 @@ class LocalProvider(AIProvider):
    @property
    def name(self) -> str:
-        return "Local (llama-server)"
+        return "Ollama"
--- a/python/voice_to_notes/services/ai_provider.py
+++ b/python/voice_to_notes/services/ai_provider.py
@@ -92,7 +92,7 @@ class AIProviderService:
 def create_default_service() -> AIProviderService:
    """Create an AIProviderService with all supported providers registered."""
    from voice_to_notes.providers.anthropic_provider import AnthropicProvider
-    from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+    from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
    from voice_to_notes.providers.local_provider import LocalProvider
    from voice_to_notes.providers.openai_provider import OpenAIProvider
@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
    service.register_provider("local", LocalProvider())
    service.register_provider("openai", OpenAIProvider())
    service.register_provider("anthropic", AnthropicProvider())
-    service.register_provider("litellm", LiteLLMProvider())
+    service.register_provider("litellm", OpenAICompatibleProvider())
    return service
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -2,14 +2,143 @@
 from __future__ import annotations
 import os
 import subprocess
 import sys
 import tempfile
 import threading
 import time
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 # Disable pyannote telemetry — it has a bug in v4.0.4 where
 # np.isfinite(None) crashes when max_speakers is not set.
 os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
 from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
 _patched = False
 def _patch_pyannote_audio() -> None:
    """Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
    pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
    unconditionally even when torchcodec is not installed, causing NameError.
    This replaces the Audio.__call__ method with a torchaudio-based version.
    """
    global _patched
    if _patched:
        return
    _patched = True
    try:
        import numpy as np
        import soundfile as sf
        import torch
        from pyannote.audio.core.io import Audio
        # Cache loaded audio to avoid re-reading the entire file for every crop call.
        # For a 3-hour file, crop is called 1000+ times — without caching, each call
        # reads ~345MB from disk.
        _audio_cache: dict[str, tuple] = {}
        def _sf_load(audio_path: str) -> tuple:
            """Load audio via soundfile with caching."""
            key = str(audio_path)
            if key in _audio_cache:
                return _audio_cache[key]
            data, sample_rate = sf.read(key, dtype="float32")
            waveform = torch.from_numpy(np.array(data))
            if waveform.ndim == 1:
                waveform = waveform.unsqueeze(0)
            else:
                waveform = waveform.T
            _audio_cache[key] = (waveform, sample_rate)
            return waveform, sample_rate
        def _soundfile_call(self, file: dict) -> tuple:
            """Replacement for Audio.__call__."""
            return _sf_load(file["audio"])
        def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
            """Replacement for Audio.crop — load file once (cached) then slice.
            Pads short segments with zeros to match the expected duration,
            which pyannote requires for batched embedding extraction.
            """
            duration = kwargs.get("duration", None)
            waveform, sample_rate = _sf_load(file["audio"])
            # Convert segment (seconds) to sample indices
            start_sample = int(segment.start * sample_rate)
            end_sample = int(segment.end * sample_rate)
            # Clamp to bounds
            start_sample = max(0, start_sample)
            end_sample = min(waveform.shape[-1], end_sample)
            cropped = waveform[:, start_sample:end_sample]
            # Pad to expected duration if needed (pyannote batches require uniform size)
            if duration is not None:
                expected_samples = int(duration * sample_rate)
            else:
                expected_samples = int((segment.end - segment.start) * sample_rate)
            if cropped.shape[-1] < expected_samples:
                pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1])
                cropped = torch.cat([cropped, pad], dim=-1)
            return cropped, sample_rate
        Audio.__call__ = _soundfile_call  # type: ignore[assignment]
        Audio.crop = _soundfile_crop  # type: ignore[assignment]
        print("[sidecar] Patched pyannote Audio to use soundfile", file=sys.stderr, flush=True)
    except Exception as e:
        print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
 def _ensure_wav(file_path: str) -> tuple[str, str | None]:
    """Convert audio to 16kHz mono WAV if needed.
    pyannote.audio v4.0.4 has a bug where its AudioDecoder returns
    duration=None for some formats (FLAC, etc.), causing crashes.
    Converting to WAV ensures the duration header is always present.
    Returns:
        (path_to_use, temp_path_or_None)
        If conversion was needed, temp_path is the WAV file to clean up.
    """
    ext = Path(file_path).suffix.lower()
    if ext == ".wav":
        return file_path, None
    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    tmp.close()
    try:
        subprocess.run(
            [
                get_ffmpeg_path(), "-y", "-i", file_path,
                "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
                tmp.name,
            ],
            check=True,
            capture_output=True,
        )
        print(
            f"[sidecar] Converted {ext} to WAV for diarization",
            file=sys.stderr,
            flush=True,
        )
        return tmp.name, tmp.name
    except (subprocess.CalledProcessError, FileNotFoundError) as e:
        # ffmpeg not available or failed — try original file and hope for the best
        print(
            f"[sidecar] WAV conversion failed ({e}), using original file",
            file=sys.stderr,
            flush=True,
        )
        os.unlink(tmp.name)
        return file_path, None
@dataclass
 class SpeakerSegment:
@@ -35,45 +164,60 @@ class DiarizeService:
    def __init__(self) -> None:
        self._pipeline: Any = None
-    def _ensure_pipeline(self) -> Any:
+    def _ensure_pipeline(self, hf_token: str | None = None) -> Any:
        """Load the pyannote diarization pipeline (lazy)."""
        if self._pipeline is not None:
            return self._pipeline
        print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
-        try:
+        # Use token from argument, fall back to environment variable
-            from pyannote.audio import Pipeline
+        if not hf_token:
            hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
-            self._pipeline = Pipeline.from_pretrained(
+        # Persist token globally so ALL huggingface_hub sub-downloads use auth.
        # Pyannote has internal dependencies that don't forward the token= param.
        if hf_token:
            os.environ["HF_TOKEN"] = hf_token
            import huggingface_hub
            huggingface_hub.login(token=hf_token, add_to_git_credential=False)
        models = [
            "pyannote/speaker-diarization-3.1",
-                use_auth_token=False,
+            "pyannote/speaker-diarization",
-            )
+        ]
-        except Exception:
+
-            # Fall back to a simpler approach if the model isn't available
+        last_error: Exception | None = None
-            # pyannote requires HuggingFace token for some models
+        _patch_pyannote_audio()
-            # Try the community model first
+        for model_name in models:
            try:
                from pyannote.audio import Pipeline
-                self._pipeline = Pipeline.from_pretrained(
+                self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
-                    "pyannote/speaker-diarization",
+                print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
-                    use_auth_token=False,
+                # Move pipeline to GPU if available
-                )
+                try:
                    import torch
                    if torch.cuda.is_available():
                        self._pipeline = self._pipeline.to(torch.device("cuda"))
                        print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
                except Exception as e:
                    print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
                return self._pipeline
            except Exception as e:
                last_error = e
                print(
-                    f"[sidecar] Warning: Could not load pyannote pipeline: {e}",
+                    f"[sidecar] Warning: Could not load {model_name}: {e}",
                    file=sys.stderr,
                    flush=True,
                )
        raise RuntimeError(
            "pyannote.audio pipeline not available. "
            "You may need to accept the model license at "
            "https://huggingface.co/pyannote/speaker-diarization-3.1 "
            "and set a HF_TOKEN environment variable."
-                ) from e
+        ) from last_error
        return self._pipeline
    def diarize(
        self,
@@ -82,6 +226,8 @@ class DiarizeService:
        num_speakers: int | None = None,
        min_speakers: int | None = None,
        max_speakers: int | None = None,
        hf_token: str | None = None,
        audio_duration_sec: float | None = None,
    ) -> DiarizationResult:
        """Run speaker diarization on an audio file.
@@ -99,7 +245,7 @@ class DiarizeService:
            progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
        )
-        pipeline = self._ensure_pipeline()
+        pipeline = self._ensure_pipeline(hf_token=hf_token)
        write_message(
            progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
@@ -116,8 +262,62 @@ class DiarizeService:
        if max_speakers is not None:
            kwargs["max_speakers"] = max_speakers
-        # Run diarization
+        # Convert to WAV to work around pyannote v4.0.4 duration bug
-        diarization = pipeline(file_path, **kwargs)
+        audio_path, temp_wav = _ensure_wav(file_path)
        print(
            f"[sidecar] Running diarization on {audio_path} with kwargs: {kwargs}",
            file=sys.stderr,
            flush=True,
        )
        # Run diarization in background thread for progress reporting
        result_holder: list = [None]
        error_holder: list[Exception | None] = [None]
        done_event = threading.Event()
        def _run():
            try:
                result_holder[0] = pipeline(audio_path, **kwargs)
            except Exception as e:
                error_holder[0] = e
            finally:
                done_event.set()
        thread = threading.Thread(target=_run, daemon=True)
        thread.start()
        elapsed = 0.0
        estimated_total = max(audio_duration_sec * 0.8, 30.0) if audio_duration_sec else 120.0
        duration_str = ""
        if audio_duration_sec and audio_duration_sec > 600:
            mins = int(audio_duration_sec / 60)
            duration_str = f" ({mins}min audio, this may take a while)"
        while not done_event.wait(timeout=5.0):
            elapsed += 5.0
            pct = min(20 + int((elapsed / estimated_total) * 65), 85)
            elapsed_min = int(elapsed / 60)
            elapsed_sec = int(elapsed % 60)
            time_str = f"{elapsed_min}m{elapsed_sec:02d}s" if elapsed_min > 0 else f"{int(elapsed)}s"
            write_message(progress_message(
                request_id, pct, "diarizing",
                f"Analyzing speakers ({time_str} elapsed){duration_str}"))
        thread.join()
        # Clean up temp file
        if temp_wav:
            os.unlink(temp_wav)
        if error_holder[0] is not None:
            raise error_holder[0]
        raw_result = result_holder[0]
        # pyannote 4.0+ returns DiarizeOutput; older versions return Annotation directly
        if hasattr(raw_result, "speaker_diarization"):
            diarization = raw_result.speaker_diarization
        else:
            diarization = raw_result
        # Convert pyannote output to our format
        result = DiarizationResult()
--- a/python/voice_to_notes/services/pipeline.py
+++ b/python/voice_to_notes/services/pipeline.py
@@ -2,13 +2,19 @@
 from __future__ import annotations
 import concurrent.futures
 import sys
 import time
 from dataclasses import dataclass, field
 from typing import Any
-from voice_to_notes.ipc.messages import progress_message
+from voice_to_notes.ipc.messages import (
    partial_segment_message,
    progress_message,
    speaker_update_message,
 )
 from voice_to_notes.ipc.protocol import write_message
 from voice_to_notes.utils.ffmpeg import get_ffprobe_path
 from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
 from voice_to_notes.services.transcribe import (
    SegmentResult,
@@ -60,6 +66,7 @@ class PipelineService:
        min_speakers: int | None = None,
        max_speakers: int | None = None,
        skip_diarization: bool = False,
        hf_token: str | None = None,
    ) -> PipelineResult:
        """Run the full transcription + diarization pipeline.
@@ -77,22 +84,59 @@ class PipelineService:
        """
        start_time = time.time()
-        # Step 1: Transcribe
+        # Step 0: Probe audio duration for conditional chunked transcription
        write_message(
            progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
        )
-        transcription = self._transcribe_service.transcribe(
+        def _emit_segment(seg: SegmentResult, index: int) -> None:
            write_message(partial_segment_message(request_id, {
                "index": index,
                "text": seg.text,
                "start_ms": seg.start_ms,
                "end_ms": seg.end_ms,
                "words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
            }))
        audio_duration_sec = None
        try:
            import subprocess
            probe_result = subprocess.run(
                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
            audio_duration_sec = float(probe_result.stdout.strip())
        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
            pass
        def _run_transcription() -> TranscriptionResult:
            """Run transcription (chunked or standard based on duration)."""
            from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
            if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
                return self._transcribe_service.transcribe_chunked(
                    request_id=request_id,
                    file_path=file_path,
                    model_name=model_name,
                    device=device,
                    compute_type=compute_type,
                    language=language,
                    on_segment=_emit_segment,
                )
            else:
                return self._transcribe_service.transcribe(
                    request_id=request_id,
                    file_path=file_path,
                    model_name=model_name,
                    device=device,
                    compute_type=compute_type,
                    language=language,
                    on_segment=_emit_segment,
                )
        if skip_diarization:
-            # Convert transcription directly without speaker labels
+            # Sequential: transcribe only, no diarization needed
            transcription = _run_transcription()
            result = PipelineResult(
                language=transcription.language,
                language_probability=transcription.language_probability,
@@ -110,37 +154,63 @@ class PipelineService:
                )
            return result
-        # Step 2: Diarize (with graceful fallback)
+        # Parallel execution: run transcription (0-45%) and diarization (45-90%)
        # concurrently, then merge (90-100%).
        write_message(
-            progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
+            progress_message(
                request_id, 0, "pipeline",
                "Starting transcription and diarization in parallel..."
            )
        )
        diarization = None
-        try:
+        diarization_error = None
-            diarization = self._diarize_service.diarize(
+
        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
            transcription_future = executor.submit(_run_transcription)
            # Use probed audio_duration_sec for diarization progress estimation
            # (transcription hasn't finished yet, so we can't use transcription.duration_ms)
            diarization_future = executor.submit(
                self._diarize_service.diarize,
                request_id=request_id,
                file_path=file_path,
                num_speakers=num_speakers,
                min_speakers=min_speakers,
                max_speakers=max_speakers,
                hf_token=hf_token,
                audio_duration_sec=audio_duration_sec,
            )
            # Wait for both futures. We need the transcription result regardless,
            # but diarization may fail gracefully.
            transcription = transcription_future.result()
            write_message(
                progress_message(request_id, 45, "pipeline", "Transcription complete")
            )
            try:
                diarization = diarization_future.result()
            except Exception as e:
                import traceback
                diarization_error = e
                print(
                    f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
                    file=sys.stderr,
                    flush=True,
                )
                traceback.print_exc(file=sys.stderr)
                write_message(
                    progress_message(
                        request_id, 80, "pipeline",
-                    "Diarization unavailable, using transcription only..."
+                        f"Diarization failed ({e}), using transcription only..."
                    )
                )
        # Step 3: Merge (or skip if diarization failed)
        if diarization is not None:
            write_message(
-                progress_message(request_id, 90, "pipeline", "Merging transcript with speakers...")
+                progress_message(request_id, 90, "merging", "Merging transcript with speakers...")
            )
            result = self._merge_results(transcription, diarization.speaker_segments)
            result.speakers = diarization.speakers
@@ -170,6 +240,10 @@ class PipelineService:
            flush=True,
        )
        updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
        if updates:
            write_message(speaker_update_message(request_id, updates))
        write_message(
            progress_message(request_id, 100, "done", "Pipeline complete")
        )
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 import sys
 import time
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any
@@ -11,6 +12,10 @@ from faster_whisper import WhisperModel
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
 from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path
 CHUNK_REPORT_SIZE = 10
 LARGE_FILE_THRESHOLD_SEC = 3600  # 1 hour
@dataclass
@@ -72,11 +77,28 @@ class TranscribeService:
            file=sys.stderr,
            flush=True,
        )
        try:
            self._model = WhisperModel(
                model_name,
                device=device,
                compute_type=compute_type,
            )
        except Exception as e:
            if device != "cpu":
                print(
                    f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
                    file=sys.stderr,
                    flush=True,
                )
                device = "cpu"
                compute_type = "int8"
                self._model = WhisperModel(
                    model_name,
                    device=device,
                    compute_type=compute_type,
                )
            else:
                raise
        self._current_model_name = model_name
        self._current_device = device
        self._current_compute_type = compute_type
@@ -90,17 +112,23 @@ class TranscribeService:
        device: str = "cpu",
        compute_type: str = "int8",
        language: str | None = None,
        on_segment: Callable[[SegmentResult, int], None] | None = None,
        chunk_label: str | None = None,
    ) -> TranscriptionResult:
        """Transcribe an audio file with word-level timestamps.
        Sends progress messages via IPC during processing.
        If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it.
        """
-        # Stage: loading model
+        prefix = f"{chunk_label}: " if chunk_label else ""
        # Stage: loading model (skip for chunks after the first — model already loaded)
        if not chunk_label:
            write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
        model = self._ensure_model(model_name, device, compute_type)
        # Stage: transcribing
-        write_message(progress_message(request_id, 10, "transcribing", "Starting transcription..."))
+        write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription..."))
        start_time = time.time()
        segments_iter, info = model.transcribe(
@@ -145,17 +173,24 @@ class TranscribeService:
                )
            )
-            # Send progress every few segments
+            if on_segment:
-            if segment_count % 5 == 0:
+                on_segment(result.segments[-1], segment_count - 1)
            write_message(
                progress_message(
                    request_id,
                    progress_pct,
                    "transcribing",
-                        f"Processed {segment_count} segments...",
+                    f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...",
                )
            )
            if segment_count % CHUNK_REPORT_SIZE == 0:
                write_message(progress_message(
                    request_id, progress_pct, "transcribing",
                    f"Completed chunk of {CHUNK_REPORT_SIZE} segments "
                    f"({segment_count} total, {progress_pct}% of audio)..."))
        elapsed = time.time() - start_time
        print(
            f"[sidecar] Transcription complete: {segment_count} segments in {elapsed:.1f}s",
@@ -166,6 +201,114 @@ class TranscribeService:
        write_message(progress_message(request_id, 100, "done", "Transcription complete"))
        return result
    def transcribe_chunked(
        self,
        request_id: str,
        file_path: str,
        model_name: str = "base",
        device: str = "cpu",
        compute_type: str = "int8",
        language: str | None = None,
        on_segment: Callable[[SegmentResult, int], None] | None = None,
        chunk_duration_sec: int = 300,
    ) -> TranscriptionResult:
        """Transcribe a large audio file by splitting into chunks.
        Uses ffmpeg to split the file into chunks, transcribes each chunk,
        then merges the results with corrected timestamps.
        Falls back to standard transcribe() if ffmpeg is not available.
        """
        import subprocess
        import tempfile
        # Get total duration via ffprobe
        try:
            probe_result = subprocess.run(
                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
                capture_output=True, text=True, check=True,
            )
            total_duration = float(probe_result.stdout.strip())
        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
            # ffprobe not available or failed — fall back to standard transcription
            write_message(progress_message(
                request_id, 5, "transcribing",
                "ffmpeg not available, using standard transcription..."))
            return self.transcribe(request_id, file_path, model_name, device,
                                   compute_type, language, on_segment=on_segment)
        num_chunks = max(1, int(total_duration / chunk_duration_sec) + 1)
        write_message(progress_message(
            request_id, 5, "transcribing",
            f"Splitting {total_duration:.0f}s file into {num_chunks} chunks..."))
        merged_result = TranscriptionResult()
        global_segment_index = 0
        for chunk_idx in range(num_chunks):
            chunk_start = chunk_idx * chunk_duration_sec
            if chunk_start >= total_duration:
                break
            chunk_start_ms = int(chunk_start * 1000)
            # Extract chunk to temp file
            tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
            tmp.close()
            try:
                subprocess.run(
                    [get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
                     "-t", str(chunk_duration_sec),
                     "-i", file_path,
                     "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
                     tmp.name],
                    capture_output=True, check=True,
                )
                # Wrap on_segment to offset the index
                chunk_on_segment = None
                if on_segment:
                    base_index = global_segment_index
                    def chunk_on_segment(seg: SegmentResult, idx: int, _base=base_index) -> None:
                        on_segment(seg, _base + idx)
                chunk_result = self.transcribe(
                    request_id, tmp.name, model_name, device,
                    compute_type, language, on_segment=chunk_on_segment,
                    chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}",
                )
                # Offset timestamps and merge
                for seg in chunk_result.segments:
                    seg.start_ms += chunk_start_ms
                    seg.end_ms += chunk_start_ms
                    for word in seg.words:
                        word.start_ms += chunk_start_ms
                        word.end_ms += chunk_start_ms
                    merged_result.segments.append(seg)
                global_segment_index += len(chunk_result.segments)
                # Take language from first chunk
                if chunk_idx == 0:
                    merged_result.language = chunk_result.language
                    merged_result.language_probability = chunk_result.language_probability
            finally:
                import os
                os.unlink(tmp.name)
            # Chunk progress
            chunk_pct = min(10 + int(((chunk_idx + 1) / num_chunks) * 80), 90)
            write_message(progress_message(
                request_id, chunk_pct, "transcribing",
                f"Completed chunk {chunk_idx + 1}/{num_chunks}..."))
        merged_result.duration_ms = int(total_duration * 1000)
        write_message(progress_message(request_id, 100, "done", "Transcription complete"))
        return merged_result
 def result_to_payload(result: TranscriptionResult) -> dict[str, Any]:
    """Convert TranscriptionResult to IPC payload dict."""
--- a/python/voice_to_notes/utils/ffmpeg.py
+++ b/python/voice_to_notes/utils/ffmpeg.py
@@ -0,0 +1,43 @@
 """Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
 from __future__ import annotations
 import os
 import sys
 def get_ffmpeg_path() -> str:
    """Return the path to the ffmpeg binary.
    When running as a frozen PyInstaller bundle, looks next to sys.executable.
    Otherwise falls back to the system PATH.
    """
    if getattr(sys, "frozen", False):
        # Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
        bundle_dir = os.path.dirname(sys.executable)
        candidates = [
            os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
            os.path.join(bundle_dir, "ffmpeg"),
        ]
        for path in candidates:
            if os.path.isfile(path):
                return path
    return "ffmpeg"
 def get_ffprobe_path() -> str:
    """Return the path to the ffprobe binary.
    When running as a frozen PyInstaller bundle, looks next to sys.executable.
    Otherwise falls back to the system PATH.
    """
    if getattr(sys, "frozen", False):
        bundle_dir = os.path.dirname(sys.executable)
        candidates = [
            os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
            os.path.join(bundle_dir, "ffprobe"),
        ]
        for path in candidates:
            if os.path.isfile(path):
                return path
    return "ffprobe"
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -59,6 +59,15 @@ version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 [[package]]
 name = "arbitrary"
 version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
 dependencies = [
 "derive_arbitrary",
 ]
 [[package]]
 name = "async-broadcast"
 version = "0.7.2"
@@ -655,6 +664,17 @@ dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "derive_arbitrary"
 version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
 dependencies = [
 "proc-macro2",
 "quote",
 "syn 2.0.117",
 ]
 [[package]]
 name = "derive_more"
 version = "0.99.20"
@@ -4362,7 +4382,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 [[package]]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "0.2.2"
 dependencies = [
 "chrono",
 "rusqlite",
@@ -4374,6 +4394,7 @@ dependencies = [
 "tauri-plugin-opener",
 "thiserror 1.0.69",
 "uuid",
 "zip",
 ]
 [[package]]
@@ -5412,12 +5433,41 @@ dependencies = [
 "syn 2.0.117",
 ]
 [[package]]
 name = "zip"
 version = "2.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
 dependencies = [
 "arbitrary",
 "crc32fast",
 "crossbeam-utils",
 "displaydoc",
 "flate2",
 "indexmap 2.13.0",
 "memchr",
 "thiserror 2.0.18",
 "zopfli",
 ]
 [[package]]
 name = "zmij"
 version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
 [[package]]
 name = "zopfli"
 version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
 dependencies = [
 "bumpalo",
 "crc32fast",
 "log",
 "simd-adler32",
 ]
 [[package]]
 name = "zvariant"
 version = "5.10.0"
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "0.2.44"
 description = "Voice to Notes — desktop transcription with speaker identification"
 authors = ["Voice to Notes Contributors"]
 license = "MIT"
@@ -14,12 +14,16 @@ crate-type = ["staticlib", "cdylib", "rlib"]
 tauri-build = { version = "2", features = [] }
 [dependencies]
-tauri = { version = "2", features = ["protocol-asset"] }
+tauri = { version = "2", features = ["protocol-asset", "devtools"] }
 tauri-plugin-opener = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 rusqlite = { version = "0.31", features = ["bundled"] }
 uuid = { version = "1", features = ["v4", "serde"] }
 zip = { version = "2", default-features = false, features = ["deflate"] }
 thiserror = "1"
 chrono = { version = "0.4", features = ["serde"] }
 tauri-plugin-dialog = "2.6.0"
 reqwest = { version = "0.12", features = ["stream", "json"] }
 futures-util = "0.3"
 bytes = "1"
--- a/src-tauri/binaries/.gitkeep
+++ b/src-tauri/binaries/.gitkeep
--- a/src-tauri/build.rs
+++ b/src-tauri/build.rs
@@ -1,3 +1,21 @@
 fn main() {
    // Ensure sidecar.zip exists so tauri-build doesn't fail.
    // CI replaces this placeholder with the real PyInstaller sidecar archive.
    let zip_path = std::path::Path::new("sidecar.zip");
    if !zip_path.exists() {
        // Minimal valid zip (empty archive): end-of-central-directory record
        let empty_zip: [u8; 22] = [
            0x50, 0x4b, 0x05, 0x06, // EOCD signature
            0x00, 0x00, // disk number
            0x00, 0x00, // disk with central dir
            0x00, 0x00, // entries on this disk
            0x00, 0x00, // total entries
            0x00, 0x00, 0x00, 0x00, // central dir size
            0x00, 0x00, 0x00, 0x00, // central dir offset
            0x00, 0x00, // comment length
        ];
        std::fs::write(zip_path, empty_zip).expect("Failed to create placeholder sidecar.zip");
    }
    tauri_build::build()
 }
--- a/src-tauri/nsis-hooks.nsh
+++ b/src-tauri/nsis-hooks.nsh
@@ -0,0 +1,11 @@
 ; NSIS uninstall hook for Voice to Notes
 ; Removes the sidecar data directory (extracted sidecar binaries + logs)
 ; but preserves user data in $PROFILE\.voicetonotes (database, settings, models)
 !macro NSIS_HOOK_POSTUNINSTALL
  ; Remove the Tauri app_local_data_dir which contains:
  ;   - Extracted sidecar directories (voice-to-notes-sidecar/)
  ;   - sidecar.log
  ; Path: %LOCALAPPDATA%\com.voicetonotes.app
  RMDir /r "$LOCALAPPDATA\com.voicetonotes.app"
 !macroend
--- a/src-tauri/src/commands/ai.rs
+++ b/src-tauri/src/commands/ai.rs
@@ -39,7 +39,11 @@ pub fn ai_chat(
    if response.msg_type == "error" {
        return Err(format!(
            "AI error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
                .payload
                .get("message")
                .and_then(|v| v.as_str())
                .unwrap_or("unknown")
        ));
    }
--- a/src-tauri/src/commands/export.rs
+++ b/src-tauri/src/commands/export.rs
@@ -33,7 +33,11 @@ pub fn export_transcript(
    if response.msg_type == "error" {
        return Err(format!(
            "Export error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
                .payload
                .get("message")
                .and_then(|v| v.as_str())
                .unwrap_or("unknown")
        ));
    }
--- a/src-tauri/src/commands/media.rs
+++ b/src-tauri/src/commands/media.rs
@@ -0,0 +1,124 @@
 use std::path::PathBuf;
 use std::process::Command;
 #[cfg(target_os = "windows")]
 use std::os::windows::process::CommandExt;
 /// Extract audio from a video file to a WAV file using ffmpeg.
 /// Returns the path to the extracted audio file.
 #[tauri::command]
 pub fn extract_audio(file_path: String, output_path: Option<String>) -> Result<String, String> {
    let input = PathBuf::from(&file_path);
    if !input.exists() {
        return Err(format!("File not found: {}", file_path));
    }
    // Use provided output path, or fall back to a temp WAV file
    let stem = input.file_stem().unwrap_or_default().to_string_lossy();
    let output = match output_path {
        Some(ref p) => PathBuf::from(p),
        None => std::env::temp_dir().join(format!("{stem}_audio.wav")),
    };
    eprintln!(
        "[media] Extracting audio: {} -> {}",
        input.display(),
        output.display()
    );
    // Find ffmpeg — check sidecar extract dir first, then system PATH
    let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?;
    let mut cmd = Command::new(&ffmpeg);
    cmd.args([
            "-y",             // Overwrite output
            "-i",
            &file_path,
            "-vn",            // No video
            "-acodec",
            "pcm_s16le",      // WAV PCM 16-bit
            "-ar",
            "22050",          // 22kHz mono for better playback quality
            "-ac",
            "1",              // Mono
        ])
        .arg(output.to_str().unwrap())
        .stdout(std::process::Stdio::null())
        .stderr(std::process::Stdio::piped());
    // Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
    #[cfg(target_os = "windows")]
    cmd.creation_flags(0x08000000);
    let status = cmd
        .status()
        .map_err(|e| format!("Failed to run ffmpeg: {e}"))?;
    if !status.success() {
        return Err(format!("ffmpeg exited with status {status}"));
    }
    if !output.exists() {
        return Err("ffmpeg completed but output file not found".to_string());
    }
    eprintln!("[media] Audio extracted successfully");
    Ok(output.to_string_lossy().to_string())
 }
 #[tauri::command]
 pub fn check_file_exists(path: String) -> bool {
    std::path::Path::new(&path).exists()
 }
 #[tauri::command]
 pub fn copy_file(src: String, dst: String) -> Result<(), String> {
    std::fs::copy(&src, &dst).map_err(|e| format!("Failed to copy file: {e}"))?;
    Ok(())
 }
 #[tauri::command]
 pub fn create_dir(path: String) -> Result<(), String> {
    std::fs::create_dir_all(&path).map_err(|e| format!("Failed to create directory: {e}"))?;
    Ok(())
 }
 /// Find ffmpeg binary — check sidecar directory first, then system PATH.
 fn find_ffmpeg() -> Option<String> {
    // Check sidecar extract dir (ffmpeg is bundled with the sidecar)
    if let Some(data_dir) = crate::sidecar::DATA_DIR.get() {
        // Read sidecar version to find the right directory
        let version_file = data_dir.join("sidecar-version.txt");
        if let Ok(version) = std::fs::read_to_string(&version_file) {
            let version = version.trim();
            let sidecar_dir = data_dir.join(format!("sidecar-{version}"));
            let ffmpeg_name = if cfg!(target_os = "windows") {
                "ffmpeg.exe"
            } else {
                "ffmpeg"
            };
            let ffmpeg_path = sidecar_dir.join(ffmpeg_name);
            if ffmpeg_path.exists() {
                return Some(ffmpeg_path.to_string_lossy().to_string());
            }
        }
    }
    // Fall back to system PATH
    let ffmpeg_name = if cfg!(target_os = "windows") {
        "ffmpeg.exe"
    } else {
        "ffmpeg"
    };
    if Command::new(ffmpeg_name)
        .arg("-version")
        .stdout(std::process::Stdio::null())
        .stderr(std::process::Stdio::null())
        .status()
        .is_ok()
    {
        return Some(ffmpeg_name.to_string());
    }
    None
 }
--- a/src-tauri/src/commands/mod.rs
+++ b/src-tauri/src/commands/mod.rs
@@ -1,6 +1,8 @@
 pub mod ai;
 pub mod export;
 pub mod media;
 pub mod project;
 pub mod settings;
 pub mod sidecar;
 pub mod system;
 pub mod transcribe;
--- a/src-tauri/src/commands/project.rs
+++ b/src-tauri/src/commands/project.rs
@@ -1,9 +1,115 @@
 use serde::{Deserialize, Serialize};
 use std::fs;
 use tauri::State;
 use crate::db::models::Project;
 use crate::db::queries;
 use crate::state::AppState;
 // ── File-based project types ────────────────────────────────────
 #[derive(Serialize, Deserialize)]
 pub struct ProjectFile {
    pub version: u32,
    pub name: String,
    #[serde(default)]
    pub audio_file: Option<String>,
    #[serde(default)]
    pub source_file: Option<String>,
    #[serde(default)]
    pub audio_wav: Option<String>,
    pub created_at: String,
    pub segments: Vec<ProjectFileSegment>,
    pub speakers: Vec<ProjectFileSpeaker>,
 }
 #[derive(Serialize, Deserialize)]
 pub struct ProjectFileSegment {
    pub text: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub speaker: Option<String>,
    pub is_edited: bool,
    pub words: Vec<ProjectFileWord>,
 }
 #[derive(Serialize, Deserialize)]
 pub struct ProjectFileWord {
    pub word: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub confidence: f64,
 }
 #[derive(Serialize, Deserialize)]
 pub struct ProjectFileSpeaker {
    pub label: String,
    pub display_name: Option<String>,
    pub color: String,
 }
 // ── Input types for save_project_transcript ──────────────────────
 #[derive(Deserialize)]
 pub struct WordInput {
    pub word: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub confidence: f64,
 }
 #[derive(Deserialize)]
 pub struct SegmentInput {
    pub text: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub speaker: Option<String>, // speaker label, not id
    pub words: Vec<WordInput>,
 }
 #[derive(Deserialize)]
 pub struct SpeakerInput {
    pub label: String,
    pub color: String,
 }
 // ── Output types for load_project_transcript ─────────────────────
 #[derive(Serialize)]
 pub struct WordOutput {
    pub word: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub confidence: Option<f64>,
 }
 #[derive(Serialize)]
 pub struct SegmentOutput {
    pub id: String,
    pub text: String,
    pub start_ms: i64,
    pub end_ms: i64,
    pub speaker: Option<String>, // speaker label
    pub words: Vec<WordOutput>,
 }
 #[derive(Serialize)]
 pub struct SpeakerOutput {
    pub id: String,
    pub label: String,
    pub display_name: Option<String>,
    pub color: Option<String>,
 }
 #[derive(Serialize)]
 pub struct ProjectTranscript {
    pub file_path: String,
    pub segments: Vec<SegmentOutput>,
    pub speakers: Vec<SpeakerOutput>,
 }
 // ── Commands ─────────────────────────────────────────────────────
 #[tauri::command]
 pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
@@ -21,3 +127,176 @@ pub fn list_projects(state: State<AppState>) -> Result<Vec<Project>, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    queries::list_projects(&conn).map_err(|e| e.to_string())
 }
 #[tauri::command]
 pub fn delete_project(id: String, state: State<AppState>) -> Result<(), String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    queries::delete_project(&conn, &id).map_err(|e| e.to_string())
 }
 #[tauri::command]
 pub fn update_segment(
    segment_id: String,
    new_text: String,
    state: State<AppState>,
 ) -> Result<(), String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    queries::update_segment_text(&conn, &segment_id, &new_text).map_err(|e| e.to_string())
 }
 #[tauri::command]
 pub fn save_project_transcript(
    project_id: String,
    file_path: String,
    segments: Vec<SegmentInput>,
    speakers: Vec<SpeakerInput>,
    state: State<AppState>,
 ) -> Result<Project, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    // 1. Create media file entry
    let media_file =
        queries::create_media_file(&conn, &project_id, &file_path).map_err(|e| e.to_string())?;
    // 2. Create speaker entries and build label -> id map
    let mut speaker_map = std::collections::HashMap::new();
    for speaker_input in &speakers {
        let speaker = queries::create_speaker(
            &conn,
            &project_id,
            &speaker_input.label,
            Some(&speaker_input.color),
        )
        .map_err(|e| e.to_string())?;
        speaker_map.insert(speaker_input.label.clone(), speaker.id);
    }
    // 3. Create segments with words
    for (index, seg_input) in segments.iter().enumerate() {
        let speaker_id = seg_input
            .speaker
            .as_ref()
            .and_then(|label| speaker_map.get(label));
        let segment = queries::create_segment(
            &conn,
            &project_id,
            &media_file.id,
            speaker_id.map(|s| s.as_str()),
            seg_input.start_ms,
            seg_input.end_ms,
            &seg_input.text,
            index as i32,
        )
        .map_err(|e| e.to_string())?;
        // Create words for this segment
        for (word_index, word_input) in seg_input.words.iter().enumerate() {
            queries::create_word(
                &conn,
                &segment.id,
                &word_input.word,
                word_input.start_ms,
                word_input.end_ms,
                Some(word_input.confidence),
                word_index as i32,
            )
            .map_err(|e| e.to_string())?;
        }
    }
    // 4. Return updated project info
    queries::get_project(&conn, &project_id)
        .map_err(|e| e.to_string())?
        .ok_or_else(|| "Project not found".to_string())
 }
 #[tauri::command]
 pub fn load_project_transcript(
    project_id: String,
    state: State<AppState>,
 ) -> Result<Option<ProjectTranscript>, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    // 1. Get media files for the project
    let media_files =
        queries::get_media_files_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
    let media_file = match media_files.first() {
        Some(mf) => mf,
        None => return Ok(None),
    };
    // 2. Get speakers for the project and build id -> label map
    let speakers =
        queries::get_speakers_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
    let speaker_label_map: std::collections::HashMap<String, String> = speakers
        .iter()
        .map(|s| (s.id.clone(), s.label.clone()))
        .collect();
    // 3. Get segments for the media file
    let db_segments =
        queries::get_segments_for_media(&conn, &media_file.id).map_err(|e| e.to_string())?;
    // 4. Build output segments with nested words
    let mut segment_outputs = Vec::with_capacity(db_segments.len());
    for seg in &db_segments {
        let words = queries::get_words_for_segment(&conn, &seg.id).map_err(|e| e.to_string())?;
        let word_outputs: Vec<WordOutput> = words
            .into_iter()
            .map(|w| WordOutput {
                word: w.word,
                start_ms: w.start_ms,
                end_ms: w.end_ms,
                confidence: w.confidence,
            })
            .collect();
        let speaker_label = seg
            .speaker_id
            .as_ref()
            .and_then(|sid| speaker_label_map.get(sid))
            .cloned();
        segment_outputs.push(SegmentOutput {
            id: seg.id.clone(),
            text: seg.text.clone(),
            start_ms: seg.start_ms,
            end_ms: seg.end_ms,
            speaker: speaker_label,
            words: word_outputs,
        });
    }
    // 5. Build speaker outputs
    let speaker_outputs: Vec<SpeakerOutput> = speakers
        .into_iter()
        .map(|s| SpeakerOutput {
            id: s.id,
            label: s.label,
            display_name: s.display_name,
            color: s.color,
        })
        .collect();
    Ok(Some(ProjectTranscript {
        file_path: media_file.file_path.clone(),
        segments: segment_outputs,
        speakers: speaker_outputs,
    }))
 }
 // ── File-based project commands ─────────────────────────────────
 #[tauri::command]
 pub fn save_project_file(path: String, project: ProjectFile) -> Result<(), String> {
    let json = serde_json::to_string_pretty(&project).map_err(|e| e.to_string())?;
    fs::write(&path, json).map_err(|e| format!("Failed to save project: {e}"))
 }
 #[tauri::command]
 pub fn load_project_file(path: String) -> Result<ProjectFile, String> {
    let json = fs::read_to_string(&path).map_err(|e| format!("Failed to read project: {e}"))?;
    serde_json::from_str(&json).map_err(|e| format!("Failed to parse project: {e}"))
 }
--- a/src-tauri/src/commands/settings.rs
+++ b/src-tauri/src/commands/settings.rs
@@ -32,3 +32,16 @@ pub fn save_settings(settings: Value) -> Result<(), String> {
    fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
    Ok(())
 }
 /// Toggle devtools on the main window.
 #[tauri::command]
 pub fn toggle_devtools(app: tauri::AppHandle, open: bool) {
    use tauri::Manager;
    if let Some(window) = app.get_webview_window("main") {
        if open {
            window.open_devtools();
        } else {
            window.close_devtools();
        }
    }
 }
--- a/src-tauri/src/commands/sidecar.rs
+++ b/src-tauri/src/commands/sidecar.rs
@@ -0,0 +1,258 @@
 use futures_util::StreamExt;
 use serde::Serialize;
 use std::io::Write;
 use tauri::{AppHandle, Emitter};
 use crate::sidecar::{SidecarManager, DATA_DIR};
 const REPO_API: &str = "https://repo.anhonesthost.net/api/v1/repos/MacroPad/voice-to-notes";
 #[derive(Serialize, Clone)]
 struct DownloadProgress {
    downloaded: u64,
    total: u64,
    percent: u8,
 }
 #[derive(Serialize)]
 pub struct UpdateInfo {
    pub current_version: String,
    pub latest_version: String,
 }
 /// Read the locally installed sidecar version from `sidecar-version.txt`.
 /// Returns `None` if the file doesn't exist or can't be read.
 fn read_local_sidecar_version() -> Option<String> {
    let data_dir = DATA_DIR.get()?;
    let version_file = data_dir.join("sidecar-version.txt");
    std::fs::read_to_string(version_file)
        .ok()
        .map(|v| v.trim().to_string())
        .filter(|v| !v.is_empty())
 }
 /// Write the sidecar version to `sidecar-version.txt` after a successful download.
 fn write_local_sidecar_version(version: &str) -> Result<(), String> {
    let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
    let version_file = data_dir.join("sidecar-version.txt");
    std::fs::write(&version_file, version)
        .map_err(|e| format!("Failed to write sidecar version file: {}", e))
 }
 /// Fetch releases from the Gitea API and find the latest sidecar release
 /// (one whose tag_name starts with "sidecar-v").
 async fn fetch_latest_sidecar_release(
    client: &reqwest::Client,
 ) -> Result<serde_json::Value, String> {
    let releases_url = format!("{}/releases?limit=20", REPO_API);
    let resp = client
        .get(&releases_url)
        .header("Accept", "application/json")
        .send()
        .await
        .map_err(|e| format!("Failed to fetch releases: {}", e))?;
    if !resp.status().is_success() {
        return Err(format!("Failed to fetch releases: HTTP {}", resp.status()));
    }
    let releases = resp
        .json::<Vec<serde_json::Value>>()
        .await
        .map_err(|e| format!("Failed to parse releases JSON: {}", e))?;
    releases
        .into_iter()
        .find(|r| {
            r["tag_name"]
                .as_str()
                .map_or(false, |t| t.starts_with("sidecar-v"))
        })
        .ok_or_else(|| "No sidecar release found".to_string())
 }
 /// Extract the version string from a sidecar tag name (e.g. "sidecar-v1.0.1" -> "1.0.1").
 fn version_from_sidecar_tag(tag: &str) -> &str {
    tag.strip_prefix("sidecar-v").unwrap_or(tag)
 }
 /// Check if the sidecar binary exists for the currently installed version.
 #[tauri::command]
 pub fn check_sidecar() -> bool {
    let data_dir = match DATA_DIR.get() {
        Some(d) => d,
        None => return false,
    };
    let version = match read_local_sidecar_version() {
        Some(v) => v,
        None => return false,
    };
    let binary_name = if cfg!(target_os = "windows") {
        "voice-to-notes-sidecar.exe"
    } else {
        "voice-to-notes-sidecar"
    };
    let extract_dir = data_dir.join(format!("sidecar-{}", version));
    extract_dir.join(binary_name).exists()
 }
 /// Determine the current platform name for asset downloads.
 fn platform_os() -> &'static str {
    if cfg!(target_os = "windows") {
        "windows"
    } else if cfg!(target_os = "macos") {
        "macos"
    } else {
        "linux"
    }
 }
 /// Determine the current architecture name for asset downloads.
 fn platform_arch() -> &'static str {
    if cfg!(target_arch = "aarch64") {
        "aarch64"
    } else {
        "x86_64"
    }
 }
 /// Download the sidecar binary for the given variant (cpu or cuda).
 #[tauri::command]
 pub async fn download_sidecar(app: AppHandle, variant: String) -> Result<(), String> {
    let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
    let os = platform_os();
    let arch = platform_arch();
    let asset_name = format!("sidecar-{}-{}-{}.zip", os, arch, variant);
    // Fetch the latest sidecar release from Gitea API
    let client = reqwest::Client::new();
    let sidecar_release = fetch_latest_sidecar_release(&client).await?;
    let tag = sidecar_release["tag_name"]
        .as_str()
        .ok_or("No tag_name in sidecar release")?;
    let sidecar_version = version_from_sidecar_tag(tag).to_string();
    // Find the matching asset
    let assets = sidecar_release["assets"]
        .as_array()
        .ok_or("No assets found in sidecar release")?;
    let download_url = assets
        .iter()
        .find(|a| a["name"].as_str() == Some(&asset_name))
        .and_then(|a| a["browser_download_url"].as_str())
        .ok_or_else(|| {
            format!(
                "Asset '{}' not found in sidecar release {}",
                asset_name, tag
            )
        })?
        .to_string();
    // Stream download with progress events
    let response: reqwest::Response = client
        .get(&download_url)
        .send()
        .await
        .map_err(|e| format!("Failed to start download: {}", e))?;
    if !response.status().is_success() {
        return Err(format!("Download failed: HTTP {}", response.status()));
    }
    let total: u64 = response.content_length().unwrap_or(0);
    let mut downloaded: u64 = 0;
    let mut stream = response.bytes_stream();
    let zip_path = data_dir.join("sidecar.zip");
    let mut file = std::fs::File::create(&zip_path)
        .map_err(|e| format!("Failed to create zip file: {}", e))?;
    while let Some(chunk) = stream.next().await {
        let chunk: bytes::Bytes = chunk.map_err(|e| format!("Download stream error: {}", e))?;
        file.write_all(&chunk)
            .map_err(|e| format!("Failed to write chunk: {}", e))?;
        downloaded += chunk.len() as u64;
        let percent = if total > 0 {
            (downloaded * 100 / total) as u8
        } else {
            0
        };
        let _ = app.emit(
            "sidecar-download-progress",
            DownloadProgress {
                downloaded,
                total,
                percent,
            },
        );
    }
    // Extract the downloaded zip
    let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
    SidecarManager::extract_zip(&zip_path, &extract_dir)?;
    // Make all binaries executable on Unix (sidecar, ffmpeg, ffprobe, etc.)
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        if let Ok(entries) = std::fs::read_dir(&extract_dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.is_file() {
                    if let Ok(meta) = std::fs::metadata(&path) {
                        let mut perms = meta.permissions();
                        perms.set_mode(0o755);
                        let _ = std::fs::set_permissions(&path, perms);
                    }
                }
            }
        }
    }
    // Write the sidecar version file
    write_local_sidecar_version(&sidecar_version)?;
    // Clean up the zip file and old sidecar versions
    let _ = std::fs::remove_file(&zip_path);
    SidecarManager::cleanup_old_sidecars(data_dir, &sidecar_version);
    Ok(())
 }
 /// Check if a sidecar update is available.
 #[tauri::command]
 pub async fn check_sidecar_update() -> Result<Option<UpdateInfo>, String> {
    // If sidecar doesn't exist yet, return None (first launch handled separately)
    if !check_sidecar() {
        return Ok(None);
    }
    let current_version = match read_local_sidecar_version() {
        Some(v) => v,
        None => return Ok(None),
    };
    // Fetch latest sidecar release from Gitea API
    let client = reqwest::Client::new();
    let sidecar_release = fetch_latest_sidecar_release(&client).await?;
    let latest_tag = sidecar_release["tag_name"]
        .as_str()
        .ok_or("No tag_name in sidecar release")?;
    let latest_version = version_from_sidecar_tag(latest_tag);
    if latest_version != current_version {
        Ok(Some(UpdateInfo {
            current_version,
            latest_version: latest_version.to_string(),
        }))
    } else {
        Ok(None)
    }
 }
--- a/src-tauri/src/commands/system.rs
+++ b/src-tauri/src/commands/system.rs
@@ -22,9 +22,7 @@ pub fn llama_start(
    threads: Option<u32>,
 ) -> Result<LlamaStatus, String> {
    let config = LlamaConfig {
-        binary_path: PathBuf::from(
+        binary_path: PathBuf::from(binary_path.unwrap_or_else(|| "llama-server".to_string())),
            binary_path.unwrap_or_else(|| "llama-server".to_string()),
        ),
        model_path: PathBuf::from(model_path),
        port: port.unwrap_or(0),
        n_gpu_layers: n_gpu_layers.unwrap_or(0),
@@ -62,3 +60,18 @@ pub fn llama_list_models() -> Value {
 pub fn get_data_dir() -> String {
    LlamaManager::data_dir().to_string_lossy().to_string()
 }
 /// Log a message from the frontend to a file for debugging.
 #[tauri::command]
 pub fn log_frontend(level: String, message: String) {
    use std::io::Write;
    let log_path = LlamaManager::data_dir().join("frontend.log");
    if let Ok(mut file) = std::fs::OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
    {
        let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
        let _ = writeln!(file, "[{timestamp}] [{level}] {message}");
    }
 }
--- a/src-tauri/src/commands/transcribe.rs
+++ b/src-tauri/src/commands/transcribe.rs
@@ -33,16 +33,47 @@ pub fn transcribe_file(
    if response.msg_type == "error" {
        return Err(format!(
            "Transcription error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
                .payload
                .get("message")
                .and_then(|v| v.as_str())
                .unwrap_or("unknown")
        ));
    }
    Ok(response.payload)
 }
 /// Download and validate the diarization model via the Python sidecar.
 #[tauri::command]
 pub fn download_diarize_model(hf_token: String) -> Result<Value, String> {
    let manager = sidecar();
    manager.ensure_running()?;
    let request_id = uuid::Uuid::new_v4().to_string();
    let msg = IPCMessage::new(
        &request_id,
        "diarize.download",
        json!({
            "hf_token": hf_token,
        }),
    );
    let response = manager.send_and_receive(&msg)?;
    if response.msg_type == "error" {
        return Ok(json!({
            "ok": false,
            "error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
        }));
    }
    Ok(json!({ "ok": true }))
 }
 /// Run the full transcription + diarization pipeline via the Python sidecar.
 #[tauri::command]
-pub fn run_pipeline(
+pub async fn run_pipeline(
    app: AppHandle,
    file_path: String,
    model: Option<String>,
@@ -52,6 +83,7 @@ pub fn run_pipeline(
    min_speakers: Option<u32>,
    max_speakers: Option<u32>,
    skip_diarization: Option<bool>,
    hf_token: Option<String>,
 ) -> Result<Value, String> {
    let manager = sidecar();
    manager.ensure_running()?;
@@ -70,19 +102,38 @@ pub fn run_pipeline(
            "min_speakers": min_speakers,
            "max_speakers": max_speakers,
            "skip_diarization": skip_diarization.unwrap_or(false),
            "hf_token": hf_token,
        }),
    );
-    let response = manager.send_and_receive_with_progress(&msg, |progress| {
+    // Run the blocking sidecar I/O on a separate thread so the async runtime
-        let _ = app.emit("pipeline-progress", &progress.payload);
+    // can deliver emitted events to the webview while processing is ongoing.
    let app_handle = app.clone();
    tauri::async_runtime::spawn_blocking(move || {
        let response = manager.send_and_receive_with_progress(&msg, |msg| {
            let event_name = match msg.msg_type.as_str() {
                "pipeline.segment" => "pipeline-segment",
                "pipeline.speaker_update" => "pipeline-speaker-update",
                _ => "pipeline-progress",
            };
            if let Err(e) = app_handle.emit(event_name, &msg.payload) {
                eprintln!("[sidecar-rs] Failed to emit {event_name}: {e}");
            }
        })?;
        if response.msg_type == "error" {
            return Err(format!(
                "Pipeline error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+                response
                    .payload
                    .get("message")
                    .and_then(|v| v.as_str())
                    .unwrap_or("unknown")
            ));
        }
        Ok(response.payload)
    })
    .await
    .map_err(|e| format!("Pipeline task failed: {e}"))?
 }
--- a/src-tauri/src/db/queries.rs
+++ b/src-tauri/src/db/queries.rs
@@ -85,6 +85,57 @@ pub fn delete_project(conn: &Connection, id: &str) -> Result<(), DatabaseError>
    Ok(())
 }
 // ── Media Files ──────────────────────────────────────────────────
 pub fn create_media_file(
    conn: &Connection,
    project_id: &str,
    file_path: &str,
 ) -> Result<MediaFile, DatabaseError> {
    let id = Uuid::new_v4().to_string();
    let now = Utc::now().to_rfc3339();
    conn.execute(
        "INSERT INTO media_files (id, project_id, file_path, created_at) VALUES (?1, ?2, ?3, ?4)",
        params![id, project_id, file_path, now],
    )?;
    Ok(MediaFile {
        id,
        project_id: project_id.to_string(),
        file_path: file_path.to_string(),
        file_hash: None,
        duration_ms: None,
        sample_rate: None,
        channels: None,
        format: None,
        file_size: None,
        created_at: now,
    })
 }
 pub fn get_media_files_for_project(
    conn: &Connection,
    project_id: &str,
 ) -> Result<Vec<MediaFile>, DatabaseError> {
    let mut stmt = conn.prepare(
        "SELECT id, project_id, file_path, file_hash, duration_ms, sample_rate, channels, format, file_size, created_at FROM media_files WHERE project_id = ?1 ORDER BY created_at",
    )?;
    let rows = stmt.query_map(params![project_id], |row| {
        Ok(MediaFile {
            id: row.get(0)?,
            project_id: row.get(1)?,
            file_path: row.get(2)?,
            file_hash: row.get(3)?,
            duration_ms: row.get(4)?,
            sample_rate: row.get(5)?,
            channels: row.get(6)?,
            format: row.get(7)?,
            file_size: row.get(8)?,
            created_at: row.get(9)?,
        })
    })?;
    Ok(rows.collect::<Result<Vec<_>, _>>()?)
 }
 // ── Speakers ──────────────────────────────────────────────────────
 pub fn create_speaker(
@@ -194,6 +245,39 @@ pub fn reassign_speaker(
    Ok(())
 }
 // ── Segments (create) ────────────────────────────────────────────
 pub fn create_segment(
    conn: &Connection,
    project_id: &str,
    media_file_id: &str,
    speaker_id: Option<&str>,
    start_ms: i64,
    end_ms: i64,
    text: &str,
    segment_index: i32,
 ) -> Result<Segment, DatabaseError> {
    let id = Uuid::new_v4().to_string();
    conn.execute(
        "INSERT INTO segments (id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, is_edited, segment_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 0, ?8)",
        params![id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, segment_index],
    )?;
    Ok(Segment {
        id,
        project_id: project_id.to_string(),
        media_file_id: media_file_id.to_string(),
        speaker_id: speaker_id.map(String::from),
        start_ms,
        end_ms,
        text: text.to_string(),
        original_text: None,
        confidence: None,
        is_edited: false,
        edited_at: None,
        segment_index,
    })
 }
 // ── Words ─────────────────────────────────────────────────────────
 pub fn get_words_for_segment(
@@ -217,6 +301,31 @@ pub fn get_words_for_segment(
    Ok(rows.collect::<Result<Vec<_>, _>>()?)
 }
 pub fn create_word(
    conn: &Connection,
    segment_id: &str,
    word: &str,
    start_ms: i64,
    end_ms: i64,
    confidence: Option<f64>,
    word_index: i32,
 ) -> Result<Word, DatabaseError> {
    let id = Uuid::new_v4().to_string();
    conn.execute(
        "INSERT INTO words (id, segment_id, word, start_ms, end_ms, confidence, word_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
        params![id, segment_id, word, start_ms, end_ms, confidence, word_index],
    )?;
    Ok(Word {
        id,
        segment_id: segment_id.to_string(),
        word: word.to_string(),
        start_ms,
        end_ms,
        confidence,
        word_index,
    })
 }
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src-tauri/src/db/schema.rs
+++ b/src-tauri/src/db/schema.rs
@@ -96,11 +96,7 @@ pub fn create_tables(conn: &Connection) -> Result<(), DatabaseError> {
    )?;
    // Initialize schema version if empty
-    let count: i32 = conn.query_row(
+    let count: i32 = conn.query_row("SELECT COUNT(*) FROM schema_version", [], |row| row.get(0))?;
        "SELECT COUNT(*) FROM schema_version",
        [],
        |row| row.get(0),
    )?;
    if count == 0 {
        conn.execute(
            "INSERT INTO schema_version (version) VALUES (?1)",
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -4,12 +4,22 @@ pub mod llama;
 pub mod sidecar;
 pub mod state;
 use tauri::window::Color;
 use tauri::Manager;
 use commands::ai::{ai_chat, ai_configure, ai_list_providers};
 use commands::export::export_transcript;
-use commands::project::{create_project, get_project, list_projects};
+use commands::media::{check_file_exists, copy_file, create_dir, extract_audio};
-use commands::settings::{load_settings, save_settings};
+use commands::project::{
-use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
+    create_project, delete_project, get_project, list_projects, load_project_file,
-use commands::transcribe::{run_pipeline, transcribe_file};
+    load_project_transcript, save_project_file, save_project_transcript, update_segment,
 };
 use commands::settings::{load_settings, save_settings, toggle_devtools};
 use commands::sidecar::{check_sidecar, check_sidecar_update, download_sidecar};
 use commands::system::{
    get_data_dir, llama_list_models, llama_start, llama_status, llama_stop, log_frontend,
 };
 use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
 use state::AppState;
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -20,12 +30,34 @@ pub fn run() {
        .plugin(tauri_plugin_opener::init())
        .plugin(tauri_plugin_dialog::init())
        .manage(app_state)
        .setup(|app| {
            // Tell the sidecar manager where Tauri placed bundled resources
            // and where to extract the sidecar archive
            if let (Ok(resource_dir), Ok(data_dir)) =
                (app.path().resource_dir(), app.path().app_local_data_dir())
            {
                sidecar::init_dirs(resource_dir, data_dir);
            }
            // Set the webview background to match the app's dark theme
            if let Some(window) = app.get_webview_window("main") {
                let _ = window.set_background_color(Some(Color(10, 10, 35, 255)));
            }
            Ok(())
        })
        .invoke_handler(tauri::generate_handler![
            create_project,
            get_project,
            list_projects,
            delete_project,
            save_project_transcript,
            load_project_transcript,
            update_segment,
            save_project_file,
            load_project_file,
            transcribe_file,
            run_pipeline,
            download_diarize_model,
            export_transcript,
            ai_chat,
            ai_list_providers,
@@ -37,6 +69,15 @@ pub fn run() {
            get_data_dir,
            load_settings,
            save_settings,
            check_sidecar,
            download_sidecar,
            check_sidecar_update,
            log_frontend,
            toggle_devtools,
            extract_audio,
            check_file_exists,
            copy_file,
            create_dir,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src-tauri/src/llama/mod.rs
+++ b/src-tauri/src/llama/mod.rs
@@ -237,11 +237,7 @@ impl LlamaManager {
    /// Get the current status.
    pub fn status(&self) -> LlamaStatus {
-        let running = self
+        let running = self.process.lock().ok().map_or(false, |p| p.is_some());
            .process
            .lock()
            .ok()
            .map_or(false, |p| p.is_some());
        let port = self.port.lock().ok().map_or(0, |p| *p);
        let model = self
            .model_path
--- a/src-tauri/src/sidecar/mod.rs
+++ b/src-tauri/src/sidecar/mod.rs
@@ -2,19 +2,40 @@ pub mod ipc;
 pub mod messages;
 use std::io::{BufRead, BufReader, Write};
 use std::path::{Path, PathBuf};
 use std::process::{Child, ChildStdin, Command, Stdio};
 use std::sync::{Mutex, OnceLock};
 #[cfg(target_os = "windows")]
 use std::os::windows::process::CommandExt;
 use crate::sidecar::messages::IPCMessage;
 /// Resource directory set by the Tauri app during setup.
 static RESOURCE_DIR: OnceLock<PathBuf> = OnceLock::new();
 /// App data directory for extracting the sidecar archive.
 pub(crate) static DATA_DIR: OnceLock<PathBuf> = OnceLock::new();
 /// Initialize directories for sidecar resolution.
 /// Must be called from the Tauri setup before any sidecar operations.
 pub fn init_dirs(resource_dir: PathBuf, data_dir: PathBuf) {
    RESOURCE_DIR.set(resource_dir).ok();
    DATA_DIR.set(data_dir).ok();
 }
 /// Get the global sidecar manager singleton.
 pub fn sidecar() -> &'static SidecarManager {
    static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
    INSTANCE.get_or_init(SidecarManager::new)
 }
-/// Manages the Python sidecar process lifecycle.
+/// Manages the sidecar process lifecycle.
-/// Uses separated stdin/stdout ownership to avoid BufReader conflicts.
+///
 /// Supports two modes:
 /// - **Production**: spawns a frozen PyInstaller binary (no Python required)
 /// - **Dev mode**: spawns system Python with `-m voice_to_notes.main`
 ///
 /// Dev mode is active when compiled in debug mode or when `VOICE_TO_NOTES_DEV=1`.
 pub struct SidecarManager {
    process: Mutex<Option<Child>>,
    stdin: Mutex<Option<ChildStdin>>,
@@ -30,38 +51,325 @@ impl SidecarManager {
        }
    }
    /// Check if we should use dev mode (system Python).
    fn is_dev_mode() -> bool {
        cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
    }
    /// Read the locally installed sidecar version from `sidecar-version.txt`.
    fn read_sidecar_version() -> Result<String, String> {
        let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
        let version_file = data_dir.join("sidecar-version.txt");
        std::fs::read_to_string(&version_file)
            .map_err(|_| {
                "Sidecar not installed: sidecar-version.txt not found. Please download the sidecar."
                    .to_string()
            })
            .map(|v| v.trim().to_string())
            .and_then(|v| {
                if v.is_empty() {
                    Err(
                        "Sidecar version file is empty. Please re-download the sidecar."
                            .to_string(),
                    )
                } else {
                    Ok(v)
                }
            })
    }
    /// Resolve the frozen sidecar binary path (production mode).
    ///
    /// Reads the installed sidecar version from `sidecar-version.txt` and
    /// looks for the binary in the corresponding `sidecar-{version}` directory.
    /// If the version file doesn't exist, the sidecar hasn't been downloaded yet.
    fn resolve_sidecar_path() -> Result<PathBuf, String> {
        let binary_name = if cfg!(target_os = "windows") {
            "voice-to-notes-sidecar.exe"
        } else {
            "voice-to-notes-sidecar"
        };
        let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
        let current_version = Self::read_sidecar_version()?;
        let extract_dir = data_dir.join(format!("sidecar-{}", current_version));
        let binary_path = extract_dir.join(binary_name);
        // Already extracted — use it directly
        if binary_path.exists() {
            // Ensure all binaries are executable (fixes previously extracted dirs)
            #[cfg(unix)]
            Self::set_executable_permissions(&extract_dir);
            Self::cleanup_old_sidecars(data_dir, &current_version);
            return Ok(binary_path);
        }
        // Find sidecar.zip in resource dir or next to exe
        let zip_path = Self::find_sidecar_zip()?;
        Self::extract_zip(&zip_path, &extract_dir)?;
        if !binary_path.exists() {
            return Err(format!(
                "Sidecar binary not found after extraction at {}",
                binary_path.display()
            ));
        }
        #[cfg(unix)]
        Self::set_executable_permissions(&extract_dir);
        Self::cleanup_old_sidecars(data_dir, &current_version);
        Ok(binary_path)
    }
    /// Locate the bundled sidecar.zip archive.
    fn find_sidecar_zip() -> Result<PathBuf, String> {
        let mut candidates: Vec<PathBuf> = Vec::new();
        if let Some(resource_dir) = RESOURCE_DIR.get() {
            candidates.push(resource_dir.join("sidecar.zip"));
        }
        if let Ok(exe) = std::env::current_exe() {
            if let Some(exe_dir) = exe.parent() {
                candidates.push(exe_dir.join("sidecar.zip"));
            }
        }
        for path in &candidates {
            if path.exists() {
                return Ok(path.clone());
            }
        }
        Err(format!(
            "Sidecar archive not found. Checked:\n{}",
            candidates
                .iter()
                .map(|p| format!("  {}", p.display()))
                .collect::<Vec<_>>()
                .join("\n"),
        ))
    }
    /// Extract a zip archive to the given directory.
    pub(crate) fn extract_zip(zip_path: &Path, dest: &Path) -> Result<(), String> {
        eprintln!(
            "[sidecar-rs] Extracting sidecar from {} to {}",
            zip_path.display(),
            dest.display()
        );
        // Clean destination so we don't mix old and new files
        if dest.exists() {
            std::fs::remove_dir_all(dest)
                .map_err(|e| format!("Failed to clean extraction dir: {e}"))?;
        }
        std::fs::create_dir_all(dest)
            .map_err(|e| format!("Failed to create extraction dir: {e}"))?;
        let file =
            std::fs::File::open(zip_path).map_err(|e| format!("Cannot open sidecar zip: {e}"))?;
        let mut archive =
            zip::ZipArchive::new(file).map_err(|e| format!("Invalid sidecar zip: {e}"))?;
        for i in 0..archive.len() {
            let mut entry = archive
                .by_index(i)
                .map_err(|e| format!("Zip entry error: {e}"))?;
            let name = entry.name().to_string();
            let outpath = dest.join(&name);
            if entry.is_dir() {
                std::fs::create_dir_all(&outpath)
                    .map_err(|e| format!("Cannot create dir {}: {e}", outpath.display()))?;
            } else {
                if let Some(parent) = outpath.parent() {
                    std::fs::create_dir_all(parent)
                        .map_err(|e| format!("Cannot create dir {}: {e}", parent.display()))?;
                }
                let mut outfile = std::fs::File::create(&outpath)
                    .map_err(|e| format!("Cannot create {}: {e}", outpath.display()))?;
                std::io::copy(&mut entry, &mut outfile)
                    .map_err(|e| format!("Write error for {}: {e}", name))?;
            }
        }
        eprintln!("[sidecar-rs] Sidecar extracted successfully");
        Ok(())
    }
    /// Remove old sidecar-* directories that don't match the current version.
    /// Called after the current version's sidecar is confirmed ready.
    /// Set execute permissions on all files in a directory (Unix only).
    #[cfg(unix)]
    fn set_executable_permissions(dir: &Path) {
        use std::os::unix::fs::PermissionsExt;
        if let Ok(entries) = std::fs::read_dir(dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.is_file() {
                    if let Ok(meta) = std::fs::metadata(&path) {
                        let mut perms = meta.permissions();
                        perms.set_mode(0o755);
                        let _ = std::fs::set_permissions(&path, perms);
                    }
                }
            }
        }
    }
    pub(crate) fn cleanup_old_sidecars(data_dir: &Path, current_version: &str) {
        let current_dir_name = format!("sidecar-{}", current_version);
        let entries = match std::fs::read_dir(data_dir) {
            Ok(entries) => entries,
            Err(e) => {
                eprintln!("[sidecar-rs] Cannot read data dir for cleanup: {e}");
                return;
            }
        };
        for entry in entries.flatten() {
            let name = entry.file_name();
            let name_str = name.to_string_lossy();
            if !name_str.starts_with("sidecar-") {
                continue;
            }
            if *name_str == current_dir_name {
                continue;
            }
            if entry.path().is_dir() {
                eprintln!(
                    "[sidecar-rs] Removing old sidecar: {}",
                    entry.path().display()
                );
                if let Err(e) = std::fs::remove_dir_all(entry.path()) {
                    eprintln!(
                        "[sidecar-rs] Failed to remove {}: {e}",
                        entry.path().display()
                    );
                }
            }
        }
    }
    /// Find a working Python command for the current platform.
    fn find_python_command() -> &'static str {
        if cfg!(target_os = "windows") {
            "python"
        } else {
            "python3"
        }
    }
    /// Resolve the Python sidecar directory for dev mode.
    fn resolve_python_dir() -> Result<std::path::PathBuf, String> {
        let manifest_dir = env!("CARGO_MANIFEST_DIR");
        let python_dir = std::path::Path::new(manifest_dir)
            .join("../python")
            .canonicalize()
            .map_err(|e| format!("Cannot find python directory: {e}"))?;
        if python_dir.exists() {
            return Ok(python_dir);
        }
        // Fallback: relative to current exe
        let exe = std::env::current_exe().map_err(|e| e.to_string())?;
        let alt = exe
            .parent()
            .ok_or_else(|| "No parent dir".to_string())?
            .join("../python")
            .canonicalize()
            .map_err(|e| format!("Cannot find python directory: {e}"))?;
        Ok(alt)
    }
    /// Ensure the sidecar is running, starting it if needed.
    pub fn ensure_running(&self) -> Result<(), String> {
        if self.is_running() {
            return Ok(());
        }
-        let python_path = std::env::current_dir()
+        if Self::is_dev_mode() {
-            .map_err(|e| e.to_string())?
+            self.start_python_dev()
-            .join("../python")
+        } else {
-            .canonicalize()
+            let path = Self::resolve_sidecar_path()?;
-            .map_err(|e| format!("Cannot find python directory: {e}"))?;
+            self.start_binary(&path)
-
+        }
        self.start(&python_path.to_string_lossy())
    }
-    /// Spawn the Python sidecar process.
+    /// Spawn the frozen sidecar binary (production mode).
-    pub fn start(&self, python_path: &str) -> Result<(), String> {
+    fn start_binary(&self, path: &std::path::Path) -> Result<(), String> {
        // Stop existing process if any
        self.stop().ok();
        eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
-        let mut child = Command::new("python3")
+        // Log sidecar stderr to a file for diagnostics
        let stderr_cfg = if let Some(data_dir) = DATA_DIR.get() {
            let _ = std::fs::create_dir_all(data_dir);
            let log_path = data_dir.join("sidecar.log");
            eprintln!("[sidecar-rs] Sidecar stderr → {}", log_path.display());
            match std::fs::File::create(&log_path) {
                Ok(f) => Stdio::from(f),
                Err(e) => {
                    eprintln!("[sidecar-rs] Failed to create sidecar.log: {e}");
                    Stdio::inherit()
                }
            }
        } else {
            eprintln!("[sidecar-rs] DATA_DIR not set, sidecar stderr will not be logged");
            Stdio::inherit()
        };
        let mut cmd = Command::new(path);
        cmd.stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(stderr_cfg);
        // Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
        #[cfg(target_os = "windows")]
        cmd.creation_flags(0x08000000);
        let child = cmd
            .spawn()
            .map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
        self.attach(child)?;
        self.wait_for_ready()
    }
    /// Spawn the Python sidecar in dev mode (system Python).
    fn start_python_dev(&self) -> Result<(), String> {
        self.stop().ok();
        let python_dir = Self::resolve_python_dir()?;
        let python_cmd = Self::find_python_command();
        eprintln!(
            "[sidecar-rs] Starting dev sidecar: {} -m voice_to_notes.main ({})",
            python_cmd,
            python_dir.display()
        );
        let child = Command::new(python_cmd)
            .arg("-m")
            .arg("voice_to_notes.main")
-            .current_dir(python_path)
+            .current_dir(&python_dir)
-            .env("PYTHONPATH", python_path)
+            .env("PYTHONPATH", &python_dir)
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(Stdio::inherit())
            .spawn()
-            .map_err(|e| format!("Failed to start sidecar: {e}"))?;
+            .map_err(|e| format!("Failed to start Python sidecar: {e}"))?;
-        // Take ownership of stdin and stdout separately
+        self.attach(child)?;
        self.wait_for_ready()
    }
    /// Take ownership of a spawned child's stdin/stdout and store the process handle.
    fn attach(&self, mut child: Child) -> Result<(), String> {
        let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
        let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
        let buf_reader = BufReader::new(stdout);
@@ -78,10 +386,6 @@ impl SidecarManager {
            let mut r = self.reader.lock().map_err(|e| e.to_string())?;
            *r = Some(buf_reader);
        }
        // Wait for the "ready" message
        self.wait_for_ready()?;
        Ok(())
    }
@@ -96,7 +400,22 @@ impl SidecarManager {
                    .read_line(&mut line)
                    .map_err(|e| format!("Read error: {e}"))?;
                if bytes == 0 {
-                    return Err("Sidecar closed stdout before sending ready".to_string());
+                    // Try to get the exit code for diagnostics
                    let exit_info = {
                        let mut proc = self.process.lock().map_err(|e| e.to_string())?;
                        if let Some(ref mut child) = *proc {
                            match child.try_wait() {
                                Ok(Some(status)) => format!(" (exit status: {status})"),
                                _ => String::new(),
                            }
                        } else {
                            String::new()
                        }
                    };
                    return Err(format!(
                        "Sidecar closed stdout before sending ready{exit_info}. \
                         The Python sidecar may have crashed on startup — check app logs for details."
                    ));
                }
                let trimmed = line.trim();
                if trimmed.is_empty() {
@@ -107,8 +426,12 @@ impl SidecarManager {
                        return Ok(());
                    }
                }
-                // Non-ready message: something is wrong
+                // Non-JSON or non-ready line — skip and keep waiting
-                break;
+                eprintln!(
                    "[sidecar-rs] Skipping pre-ready line: {}",
                    &trimmed[..trimmed.len().min(200)]
                );
                continue;
            }
        }
        Err("Sidecar did not send ready message".to_string())
@@ -120,12 +443,51 @@ impl SidecarManager {
        self.send_and_receive_with_progress(msg, |_| {})
    }
-    /// Send a message and read the response, calling on_progress for each progress message.
+    /// Send a message and receive the response, calling a callback for intermediate messages.
-    pub fn send_and_receive_with_progress(
+    /// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
    ///
    /// If the sidecar has crashed (broken pipe), automatically restarts it and retries once.
    pub fn send_and_receive_with_progress<F>(
        &self,
        msg: &IPCMessage,
-        on_progress: impl Fn(&IPCMessage),
+        on_intermediate: F,
-    ) -> Result<IPCMessage, String> {
+    ) -> Result<IPCMessage, String>
    where
        F: Fn(&IPCMessage),
    {
        match self.send_and_receive_inner(msg, &on_intermediate) {
            Ok(response) => Ok(response),
            Err(e)
                if e.contains("Write error")
                    || e.contains("closed stdout")
                    || e.contains("not available") =>
            {
                eprintln!("[sidecar-rs] Sidecar communication failed ({e}), restarting...");
                self.cleanup_handles();
                // Stop any zombie process
                {
                    let mut proc = self.process.lock().map_err(|e| e.to_string())?;
                    if let Some(ref mut child) = proc.take() {
                        let _ = child.kill();
                        let _ = child.wait();
                    }
                }
                self.ensure_running()?;
                self.send_and_receive_inner(msg, &on_intermediate)
            }
            Err(e) => Err(e),
        }
    }
    /// Inner implementation of send_and_receive.
    fn send_and_receive_inner<F>(
        &self,
        msg: &IPCMessage,
        on_intermediate: &F,
    ) -> Result<IPCMessage, String>
    where
        F: Fn(&IPCMessage),
    {
        // Write to stdin
        {
            let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
@@ -160,15 +522,20 @@ impl SidecarManager {
                    if trimmed.is_empty() {
                        continue;
                    }
-                    let response: IPCMessage = serde_json::from_str(trimmed)
+                    let response: IPCMessage =
-                        .map_err(|e| format!("Parse error: {e}"))?;
+                        serde_json::from_str(trimmed).map_err(|e| format!("Parse error: {e}"))?;
-                    if response.msg_type == "progress" {
+                    // Forward intermediate messages via callback, return the final result/error
-                        on_progress(&response);
+                    let is_intermediate = matches!(
-                        continue;
+                        response.msg_type.as_str(),
-                    }
+                        "progress" | "pipeline.segment" | "pipeline.speaker_update"
                    );
                    if is_intermediate {
                        on_intermediate(&response);
                    } else {
                        return Ok(response);
                    }
                }
            } else {
                Err("Sidecar stdout not available".to_string())
            }
@@ -203,8 +570,39 @@ impl SidecarManager {
    }
    pub fn is_running(&self) -> bool {
-        let proc = self.process.lock().ok();
+        let mut proc = match self.process.lock() {
-        proc.map_or(false, |p| p.is_some())
+            Ok(p) => p,
            Err(_) => return false,
        };
        if let Some(ref mut child) = *proc {
            // Check if the process has exited
            match child.try_wait() {
                Ok(Some(_status)) => {
                    // Process has exited — clean up handles
                    eprintln!("[sidecar-rs] Sidecar process has exited");
                    drop(proc);
                    let _ = self.cleanup_handles();
                    false
                }
                Ok(None) => true, // Still running
                Err(_) => false,
            }
        } else {
            false
        }
    }
    /// Clean up stdin/stdout/process handles after the sidecar has exited.
    fn cleanup_handles(&self) {
        if let Ok(mut s) = self.stdin.lock() {
            *s = None;
        }
        if let Ok(mut r) = self.reader.lock() {
            *r = None;
        }
        if let Ok(mut p) = self.process.lock() {
            *p = None;
        }
    }
 }
--- a/src-tauri/src/state.rs
+++ b/src-tauri/src/state.rs
@@ -15,12 +15,10 @@ pub struct AppState {
 impl AppState {
    pub fn new() -> Result<Self, String> {
        let data_dir = LlamaManager::data_dir();
-        std::fs::create_dir_all(&data_dir)
+        std::fs::create_dir_all(&data_dir).map_err(|e| format!("Cannot create data dir: {e}"))?;
            .map_err(|e| format!("Cannot create data dir: {e}"))?;
        let db_path = data_dir.join("voice_to_notes.db");
-        let conn = db::open_database(&db_path)
+        let conn = db::open_database(&db_path).map_err(|e| format!("Cannot open database: {e}"))?;
            .map_err(|e| format!("Cannot open database: {e}"))?;
        Ok(Self {
            db: Mutex::new(conn),
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -1,7 +1,7 @@
 {
  "$schema": "https://schema.tauri.app/config/2",
  "productName": "Voice to Notes",
-  "version": "0.1.0",
+  "version": "0.2.44",
  "identifier": "com.voicetonotes.app",
  "build": {
    "beforeDevCommand": "npm run dev",
@@ -16,11 +16,13 @@
        "width": 1200,
        "height": 800,
        "minWidth": 800,
-        "minHeight": 600
+        "minHeight": 600,
        "decorations": true,
        "transparent": false
      }
    ],
    "security": {
-      "csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
+      "csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'",
      "assetProtocol": {
        "enable": true,
        "scope": ["**"]
@@ -29,7 +31,7 @@
  },
  "bundle": {
    "active": true,
-    "targets": "all",
+    "targets": ["deb", "rpm", "nsis", "msi", "dmg"],
    "icon": [
      "icons/32x32.png",
      "icons/128x128.png",
@@ -40,17 +42,18 @@
    "category": "Utility",
    "shortDescription": "Transcribe audio/video with speaker identification",
    "longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
    "resources": [],
    "copyright": "Voice to Notes Contributors",
    "license": "MIT",
    "linux": {
      "deb": {
-        "depends": ["python3", "python3-pip"]
+        "depends": []
      },
      "appimage": {
        "bundleMediaFramework": true
      }
    },
    "windows": {
      "nsis": {
        "installerHooks": "nsis-hooks.nsh"
      },
      "wix": {
        "language": "en-US"
      }
--- a/src/app.html
+++ b/src/app.html
@@ -1,5 +1,5 @@
 <!doctype html>
-<html lang="en">
+<html lang="en" style="margin:0;padding:0;background:#0a0a23;height:100%;">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" href="%sveltekit.assets%/favicon.png" />
@@ -7,7 +7,7 @@
    <title>Voice to Notes</title>
    %sveltekit.head%
  </head>
-  <body data-sveltekit-preload-data="hover">
+  <body data-sveltekit-preload-data="hover" style="margin:0;padding:0;background:#0a0a23;overflow:hidden;">
    <div style="display: contents">%sveltekit.body%</div>
  </body>
 </html>
--- a/src/lib/components/AIChatPanel.svelte
+++ b/src/lib/components/AIChatPanel.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { segments, speakers } from '$lib/stores/transcript';
  import { settings, configureAIProvider } from '$lib/stores/settings';
  interface ChatMessage {
    role: 'user' | 'assistant';
@@ -43,9 +44,13 @@
        content: m.content,
      }));
      // Ensure the provider is configured with current credentials before chatting
      await configureAIProvider($settings);
      const result = await invoke<{ response: string }>('ai_chat', {
        messages: chatMessages,
        transcriptContext: getTranscriptContext(),
        provider: $settings.ai_provider,
      });
      messages = [...messages, { role: 'assistant', content: result.response }];
@@ -73,6 +78,88 @@
    messages = [];
  }
  function formatMarkdown(text: string): string {
    // Split into lines for block-level processing
    const lines = text.split('\n');
    const result: string[] = [];
    let inList = false;
    for (let i = 0; i < lines.length; i++) {
      let line = lines[i];
      // Headers
      if (line.startsWith('### ')) {
        if (inList) { result.push('</ul>'); inList = false; }
        const content = applyInlineFormatting(line.slice(4));
        result.push(`<h4>${content}</h4>`);
        continue;
      }
      if (line.startsWith('## ')) {
        if (inList) { result.push('</ul>'); inList = false; }
        const content = applyInlineFormatting(line.slice(3));
        result.push(`<h3>${content}</h3>`);
        continue;
      }
      if (line.startsWith('# ')) {
        if (inList) { result.push('</ul>'); inList = false; }
        const content = applyInlineFormatting(line.slice(2));
        result.push(`<h2>${content}</h2>`);
        continue;
      }
      // List items (- or *)
      if (/^[\-\*] /.test(line)) {
        if (!inList) { result.push('<ul>'); inList = true; }
        const content = applyInlineFormatting(line.slice(2));
        result.push(`<li>${content}</li>`);
        continue;
      }
      // Numbered list items
      if (/^\d+\.\s/.test(line)) {
        if (!inList) { result.push('<ol>'); inList = true; }
        const content = applyInlineFormatting(line.replace(/^\d+\.\s/, ''));
        result.push(`<li>${content}</li>`);
        continue;
      }
      // Non-list line: close any open list
      if (inList) {
        // Check if previous list was ordered or unordered
        const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
        result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
        inList = false;
      }
      // Empty line = paragraph break
      if (line.trim() === '') {
        result.push('<br>');
        continue;
      }
      // Regular text line
      result.push(applyInlineFormatting(line));
    }
    // Close any trailing open list
    if (inList) {
      const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
      result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
    }
    return result.join('\n');
  }
  function applyInlineFormatting(text: string): string {
    // Code blocks (backtick) — process first to avoid conflicts
    text = text.replace(/`([^`]+)`/g, '<code>$1</code>');
    // Bold (**text**)
    text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
    // Italic (*text*) — only single asterisks not already consumed by bold
    text = text.replace(/\*([^*]+)\*/g, '<em>$1</em>');
    return text;
  }
  // Quick action buttons
  async function summarize() {
    inputText = 'Please summarize this transcript in bullet points.';
@@ -107,7 +194,11 @@
    {:else}
      {#each messages as msg}
        <div class="message {msg.role}">
          {#if msg.role === 'assistant'}
            <div class="message-content">{@html formatMarkdown(msg.content)}</div>
          {:else}
            <div class="message-content">{msg.content}</div>
          {/if}
        </div>
      {/each}
      {#if isLoading}
@@ -177,47 +268,101 @@
  }
  .empty-state {
    text-align: center;
-    color: #666;
+    color: #888;
-    font-size: 0.8rem;
+    font-size: 0.85rem;
-    padding: 1rem 0;
+    padding: 2rem 1rem;
  }
  .empty-state p {
    margin-bottom: 1rem;
  }
  .quick-actions {
    display: flex;
-    gap: 0.5rem;
+    gap: 0.75rem;
    justify-content: center;
-    margin-top: 0.5rem;
+    margin-top: 1rem;
  }
  .quick-btn {
    background: rgba(233, 69, 96, 0.15);
    border: 1px solid rgba(233, 69, 96, 0.3);
    color: #e94560;
-    padding: 0.3rem 0.6rem;
+    padding: 0.45rem 0.85rem;
-    border-radius: 4px;
+    border-radius: 6px;
    cursor: pointer;
-    font-size: 0.75rem;
+    font-size: 0.8rem;
    transition: background 0.15s;
  }
  .quick-btn:hover {
    background: rgba(233, 69, 96, 0.25);
  }
  .message {
-    margin-bottom: 0.5rem;
+    margin-bottom: 0.75rem;
-    padding: 0.5rem 0.75rem;
+    padding: 0.75rem 1rem;
-    border-radius: 6px;
+    border-radius: 8px;
    font-size: 0.8rem;
-    line-height: 1.4;
+    line-height: 1.55;
  }
  .message.user {
    background: rgba(233, 69, 96, 0.15);
-    margin-left: 1rem;
+    border-left: 3px solid rgba(233, 69, 96, 0.4);
  }
  .message.assistant {
    background: rgba(255, 255, 255, 0.05);
-    margin-right: 1rem;
+    border-left: 3px solid rgba(255, 255, 255, 0.1);
  }
  .message.loading {
    opacity: 0.6;
    font-style: italic;
  }
  /* Markdown styles inside assistant messages */
  .message.assistant :global(h2) {
    font-size: 1rem;
    font-weight: 600;
    margin: 0.6rem 0 0.3rem;
    color: #f0f0f0;
  }
  .message.assistant :global(h3) {
    font-size: 0.9rem;
    font-weight: 600;
    margin: 0.5rem 0 0.25rem;
    color: #e8e8e8;
  }
  .message.assistant :global(h4) {
    font-size: 0.85rem;
    font-weight: 600;
    margin: 0.4rem 0 0.2rem;
    color: #e0e0e0;
  }
  .message.assistant :global(strong) {
    color: #f0f0f0;
    font-weight: 600;
  }
  .message.assistant :global(em) {
    color: #ccc;
    font-style: italic;
  }
  .message.assistant :global(code) {
    background: rgba(0, 0, 0, 0.3);
    color: #e94560;
    padding: 0.1rem 0.35rem;
    border-radius: 3px;
    font-size: 0.75rem;
    font-family: 'Fira Code', 'Cascadia Code', 'Consolas', monospace;
  }
  .message.assistant :global(ul),
  .message.assistant :global(ol) {
    margin: 0.35rem 0;
    padding-left: 1.3rem;
  }
  .message.assistant :global(li) {
    margin-bottom: 0.25rem;
    line-height: 1.5;
  }
  .message.assistant :global(br) {
    display: block;
    content: '';
    margin-top: 0.35rem;
  }
  .chat-input {
    display: flex;
    gap: 0.5rem;
--- a/src/lib/components/ProgressOverlay.svelte
+++ b/src/lib/components/ProgressOverlay.svelte
@@ -4,19 +4,121 @@
    percent?: number;
    stage?: string;
    message?: string;
    onCancel?: () => void;
  }
-  let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
+  let { visible = false, percent = 0, stage = '', message = '', onCancel }: Props = $props();
  let showConfirm = $state(false);
  function handleCancelClick() {
    showConfirm = true;
  }
  function confirmCancel() {
    showConfirm = false;
    onCancel?.();
  }
  function dismissCancel() {
    showConfirm = false;
  }
  // Pipeline steps in order
  const pipelineSteps = [
    { key: 'loading_model', label: 'Load transcription model' },
    { key: 'transcribing', label: 'Transcribe audio' },
    { key: 'loading_diarization', label: 'Load speaker detection model' },
    { key: 'diarizing', label: 'Identify speakers' },
    { key: 'merging', label: 'Merge results' },
  ];
  const stepOrder = pipelineSteps.map(s => s.key);
  // Track the highest step index we've reached (never goes backward)
  let highestStepIdx = $state(-1);
  // Map non-step stages to step indices for progress tracking
  function stageToStepIdx(s: string): number {
    const direct = stepOrder.indexOf(s);
    if (direct >= 0) return direct;
    // 'pipeline' stage appears before known steps — don't change highwater mark
    return -1;
  }
  $effect(() => {
    if (!visible) {
      highestStepIdx = -1;
      return;
    }
    const idx = stageToStepIdx(stage);
    if (idx > highestStepIdx) {
      highestStepIdx = idx;
    }
  });
  function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
    if (stepIdx < highestStepIdx) return 'done';
    if (stepIdx === highestStepIdx) return 'active';
    return 'pending';
  }
  // User-friendly display of current stage
  const stageLabels: Record<string, string> = {
    'pipeline': 'Initializing...',
    'loading_model': 'Loading Model',
    'transcribing': 'Transcribing',
    'loading_diarization': 'Loading Diarization',
    'diarizing': 'Speaker Detection',
    'merging': 'Merging Results',
    'done': 'Complete',
  };
  let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
 </script>
 {#if visible}
  <div class="overlay">
    <div class="progress-card">
-      <h3>{stage}</h3>
+      <div class="spinner-row">
-      <div class="bar-track">
+        <div class="spinner"></div>
-        <div class="bar-fill" style="width: {percent}%"></div>
+        <h3>{displayStage}</h3>
      </div>
-      <p>{percent}% — {message}</p>
+
      <div class="steps">
        {#each pipelineSteps as step, idx}
          {@const status = getStepStatus(idx)}
          <div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
            <span class="step-icon">
              {#if status === 'done'}
                ✓
              {:else if status === 'active'}
                ⟳
              {:else}
                ·
              {/if}
            </span>
            <span class="step-label">{step.label}</span>
          </div>
        {/each}
      </div>
      <p class="status-text">{message || 'Please wait...'}</p>
      <p class="hint-text">This may take several minutes for large files</p>
      {#if onCancel && !showConfirm}
        <button class="cancel-btn" onclick={handleCancelClick}>Cancel</button>
      {/if}
      {#if showConfirm}
        <div class="confirm-box">
          <p class="confirm-text">Processing is incomplete. If you cancel now, the transcription will need to be started over.</p>
          <div class="confirm-actions">
            <button class="confirm-keep" onclick={dismissCancel}>Continue Processing</button>
            <button class="confirm-cancel" onclick={confirmCancel}>Cancel Processing</button>
          </div>
        </div>
      {/if}
    </div>
  </div>
 {/if}
@@ -25,34 +127,139 @@
  .overlay {
    position: fixed;
    inset: 0;
-    background: rgba(0, 0, 0, 0.7);
+    background: rgba(0, 0, 0, 0.8);
    display: flex;
    align-items: center;
    justify-content: center;
-    z-index: 1000;
+    z-index: 9999;
  }
  .progress-card {
    background: #16213e;
-    padding: 2rem;
+    padding: 2rem 2.5rem;
    border-radius: 12px;
-    min-width: 400px;
+    min-width: 380px;
    max-width: 440px;
    color: #e0e0e0;
    border: 1px solid #2a3a5e;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
  }
-  h3 { margin: 0 0 1rem; text-transform: capitalize; }
+  .spinner-row {
-  .bar-track {
+    display: flex;
-    height: 8px;
+    align-items: center;
-    background: #0f3460;
+    gap: 0.75rem;
-    border-radius: 4px;
+    margin-bottom: 1.25rem;
    overflow: hidden;
  }
-  .bar-fill {
+  .spinner {
-    height: 100%;
+    width: 20px;
-    background: #e94560;
+    height: 20px;
-    transition: width 0.3s;
+    border: 3px solid #2a3a5e;
    border-top-color: #e94560;
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
    flex-shrink: 0;
  }
-  p {
+  @keyframes spin {
    to { transform: rotate(360deg); }
  }
  h3 {
    margin: 0;
    font-size: 1.1rem;
  }
  .steps {
    display: flex;
    flex-direction: column;
    gap: 0.4rem;
    margin-bottom: 1rem;
  }
  .step {
    display: flex;
    align-items: center;
    gap: 0.5rem;
    font-size: 0.85rem;
    color: #555;
  }
  .step-done {
    color: #4ecdc4;
  }
  .step-active {
    color: #e0e0e0;
    font-weight: 500;
  }
  .step-icon {
    width: 1.2rem;
    text-align: center;
    flex-shrink: 0;
  }
  .step-active .step-icon {
    animation: spin 1.5s linear infinite;
    display: inline-block;
  }
  .status-text {
    margin: 0.75rem 0 0;
    font-size: 0.85rem;
    color: #b0b0b0;
  }
  .hint-text {
    margin: 0.5rem 0 0;
-    font-size: 0.875rem;
+    font-size: 0.75rem;
    color: #555;
  }
  .cancel-btn {
    margin-top: 1.25rem;
    width: 100%;
    padding: 0.5rem;
    background: none;
    border: 1px solid #4a5568;
    color: #999;
    border-radius: 6px;
    cursor: pointer;
    font-size: 0.85rem;
  }
  .cancel-btn:hover {
    color: #e0e0e0;
    border-color: #e94560;
  }
  .confirm-box {
    margin-top: 1.25rem;
    padding: 0.75rem;
    background: rgba(233, 69, 96, 0.08);
    border: 1px solid #e94560;
    border-radius: 6px;
  }
  .confirm-text {
    margin: 0 0 0.75rem;
    font-size: 0.8rem;
    color: #e0e0e0;
    line-height: 1.4;
  }
  .confirm-actions {
    display: flex;
    gap: 0.5rem;
  }
  .confirm-keep {
    flex: 1;
    padding: 0.4rem;
    background: #0f3460;
    border: 1px solid #4a5568;
    color: #e0e0e0;
    border-radius: 4px;
    cursor: pointer;
    font-size: 0.8rem;
  }
  .confirm-keep:hover {
    background: #1a4a7a;
  }
  .confirm-cancel {
    flex: 1;
    padding: 0.4rem;
    background: #e94560;
    border: none;
    color: white;
    border-radius: 4px;
    cursor: pointer;
    font-size: 0.8rem;
  }
  .confirm-cancel:hover {
    background: #d63851;
  }
 </style>
--- a/src/lib/components/SettingsModal.svelte
+++ b/src/lib/components/SettingsModal.svelte
@@ -1,4 +1,6 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { openUrl } from '@tauri-apps/plugin-opener';
  import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
  interface Props {
@@ -9,7 +11,34 @@
  let { visible, onClose }: Props = $props();
  let localSettings = $state<AppSettings>({ ...$settings });
-  let activeTab = $state<'transcription' | 'ai' | 'local'>('transcription');
+  let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'debug'>('transcription');
  let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
  let modelError = $state('');
  let revealedFields = $state<Set<string>>(new Set());
  async function testAndDownloadModel() {
    if (!localSettings.hf_token) {
      modelStatus = 'error';
      modelError = 'Please enter a HuggingFace token first.';
      return;
    }
    modelStatus = 'downloading';
    modelError = '';
    try {
      const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
        hfToken: localSettings.hf_token,
      });
      if (result.ok) {
        modelStatus = 'success';
      } else {
        modelStatus = 'error';
        modelError = result.error || 'Unknown error';
      }
    } catch (err) {
      modelStatus = 'error';
      modelError = String(err);
    }
  }
  // Sync when settings store changes
  $effect(() => {
@@ -46,11 +75,14 @@
        <button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
          Transcription
        </button>
        <button class="tab" class:active={activeTab === 'speakers'} onclick={() => activeTab = 'speakers'}>
          Speakers
        </button>
        <button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
          AI Provider
        </button>
-        <button class="tab" class:active={activeTab === 'local'} onclick={() => activeTab = 'local'}>
+        <button class="tab" class:active={activeTab === 'debug'} onclick={() => activeTab = 'debug'}>
-          Local AI
+          Debug
        </button>
      </div>
@@ -77,27 +109,105 @@
            <label for="stt-lang">Language (blank = auto-detect)</label>
            <input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
          </div>
-          <div class="field checkbox">
+        {:else if activeTab === 'speakers'}
          <div class="field">
            <label for="hf-token">HuggingFace Token</label>
            <div class="input-reveal">
              <input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
              <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
            </div>
          </div>
          <div class="info-box">
            <p class="info-title">Setup (one-time)</p>
            <p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
            <ol>
              <li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
              <li>Accept the license on <strong>all three</strong> of these pages:
                <ul>
                  <!-- svelte-ignore a11y_no_static_element_interactions -->
                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
                  <!-- svelte-ignore a11y_no_static_element_interactions -->
                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
                  <!-- svelte-ignore a11y_no_static_element_interactions -->
                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-community-1')}>pyannote/speaker-diarization-community-1</a></li>
                </ul>
              </li>
              <!-- svelte-ignore a11y_no_static_element_interactions -->
              <li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
              <li>Paste the token above and click <strong>Test & Download</strong></li>
            </ol>
          </div>
          <button
            class="btn-download"
            onclick={testAndDownloadModel}
            disabled={modelStatus === 'downloading'}
          >
            {#if modelStatus === 'downloading'}
              Downloading model...
            {:else}
              Test & Download Model
            {/if}
          </button>
          {#if modelStatus === 'success'}
            <p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
          {/if}
          {#if modelStatus === 'error'}
            <p class="status-error">{modelError}</p>
          {/if}
          <div class="field" style="margin-top: 1rem;">
            <label for="num-speakers">Number of speakers</label>
            <select
              id="num-speakers"
              value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
              onchange={(e) => {
                const v = parseInt((e.target as HTMLSelectElement).value, 10);
                localSettings.num_speakers = v === 0 ? null : v;
              }}
            >
              <option value="0">Auto-detect</option>
              {#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
                <option value={String(n)}>{n}</option>
              {/each}
            </select>
            <p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
          </div>
          <div class="field checkbox" style="margin-top: 1rem;">
            <label>
              <input type="checkbox" bind:checked={localSettings.skip_diarization} />
-              Skip speaker diarization (faster, no speaker labels)
+              Skip speaker detection (faster, no speaker labels)
            </label>
          </div>
        {:else if activeTab === 'ai'}
          <div class="field">
            <label for="ai-provider">AI Provider</label>
            <select id="ai-provider" bind:value={localSettings.ai_provider}>
-              <option value="local">Local (llama-server)</option>
+              <option value="local">Ollama</option>
              <option value="openai">OpenAI</option>
              <option value="anthropic">Anthropic</option>
-              <option value="litellm">LiteLLM</option>
+              <option value="litellm">OpenAI Compatible</option>
            </select>
          </div>
-          {#if localSettings.ai_provider === 'openai'}
+          {#if localSettings.ai_provider === 'local'}
            <div class="field">
              <label for="ollama-url">Ollama URL</label>
              <input id="ollama-url" type="text" bind:value={localSettings.ollama_url} placeholder="http://localhost:11434" />
            </div>
            <div class="field">
              <label for="ollama-model">Model</label>
              <input id="ollama-model" type="text" bind:value={localSettings.ollama_model} placeholder="llama3.2" />
            </div>
            <p class="hint">
              Install Ollama from ollama.com, then pull a model with <code>ollama pull llama3.2</code>.
              The app connects via Ollama's OpenAI-compatible API.
            </p>
          {:else if localSettings.ai_provider === 'openai'}
            <div class="field">
              <label for="openai-key">OpenAI API Key</label>
-              <input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+              <div class="input-reveal">
                <input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="openai-model">Model</label>
@@ -106,31 +216,47 @@
          {:else if localSettings.ai_provider === 'anthropic'}
            <div class="field">
              <label for="anthropic-key">Anthropic API Key</label>
-              <input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+              <div class="input-reveal">
                <input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="anthropic-model">Model</label>
              <input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
            </div>
          {:else if localSettings.ai_provider === 'litellm'}
            <div class="field">
              <label for="litellm-base">API Base URL</label>
              <input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
            </div>
            <div class="field">
              <label for="litellm-key">API Key</label>
              <div class="input-reveal">
                <input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
              </div>
            </div>
            <div class="field">
              <label for="litellm-model">Model</label>
              <input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
            </div>
          {/if}
-        {:else}
+        {:else if activeTab === 'debug'}
-          <div class="field">
+          <div class="field checkbox">
-            <label for="llama-binary">llama-server Binary Path</label>
+            <label>
-            <input id="llama-binary" type="text" bind:value={localSettings.local_binary_path} placeholder="llama-server" />
+              <input
                type="checkbox"
                checked={localSettings.devtools_enabled}
                onchange={async (e) => {
                  localSettings.devtools_enabled = (e.target as HTMLInputElement).checked;
                  await invoke('toggle_devtools', { open: localSettings.devtools_enabled });
                }}
              />
              Enable Developer Tools
            </label>
            <p class="hint">Opens the browser inspector for debugging. Changes take effect immediately.</p>
          </div>
          <div class="field">
            <label for="llama-model">GGUF Model Path</label>
            <input id="llama-model" type="text" bind:value={localSettings.local_model_path} placeholder="~/.voicetonotes/models/model.gguf" />
          </div>
          <p class="hint">
            Place GGUF model files in ~/.voicetonotes/models/ for auto-detection.
            The local AI server uses the OpenAI-compatible API from llama.cpp.
          </p>
        {/if}
      </div>
@@ -220,11 +346,36 @@
    color: #aaa;
    margin-bottom: 0.3rem;
  }
  .input-reveal {
    display: flex;
    gap: 0;
  }
  .input-reveal input {
    flex: 1;
    border-top-right-radius: 0;
    border-bottom-right-radius: 0;
  }
  .reveal-btn {
    background: #0f3460;
    border: 1px solid #4a5568;
    border-left: none;
    color: #aaa;
    padding: 0.5rem 0.6rem;
    border-radius: 0 4px 4px 0;
    cursor: pointer;
    font-size: 0.75rem;
    white-space: nowrap;
  }
  .reveal-btn:hover {
    color: #e0e0e0;
    background: #1a4a7a;
  }
  .field input,
  .field select {
    width: 100%;
    background: #1a1a2e;
    color: #e0e0e0;
    color-scheme: dark;
    border: 1px solid #4a5568;
    border-radius: 4px;
    padding: 0.5rem;
@@ -252,6 +403,79 @@
    color: #666;
    line-height: 1.4;
  }
  .info-box {
    background: rgba(233, 69, 96, 0.05);
    border: 1px solid #2a3a5e;
    border-radius: 6px;
    padding: 0.75rem 1rem;
    margin-bottom: 1rem;
    font-size: 0.8rem;
    color: #b0b0b0;
    line-height: 1.5;
  }
  .info-box p {
    margin: 0 0 0.5rem;
  }
  .info-box p:last-child {
    margin-bottom: 0;
  }
  .info-box .info-title {
    color: #e0e0e0;
    font-weight: 600;
    font-size: 0.8rem;
  }
  .info-box ol {
    margin: 0.25rem 0 0.5rem;
    padding-left: 1.25rem;
  }
  .info-box li {
    margin-bottom: 0.25rem;
  }
  .info-box strong {
    color: #e0e0e0;
  }
  .ext-link {
    color: #e94560;
    cursor: pointer;
    text-decoration: underline;
  }
  .ext-link:hover {
    color: #ff6b81;
  }
  .info-box ul {
    margin: 0.25rem 0;
    padding-left: 1.25rem;
  }
  .btn-download {
    background: #0f3460;
    border: 1px solid #4a5568;
    color: #e0e0e0;
    padding: 0.5rem 1rem;
    border-radius: 6px;
    cursor: pointer;
    font-size: 0.85rem;
    width: 100%;
    margin-bottom: 0.5rem;
  }
  .btn-download:hover:not(:disabled) {
    background: #1a4a7a;
    border-color: #e94560;
  }
  .btn-download:disabled {
    opacity: 0.6;
    cursor: not-allowed;
  }
  .status-success {
    color: #4ecdc4;
    font-size: 0.8rem;
    margin: 0.25rem 0;
  }
  .status-error {
    color: #e94560;
    font-size: 0.8rem;
    margin: 0.25rem 0;
    word-break: break-word;
  }
  .modal-footer {
    display: flex;
    justify-content: flex-end;
--- a/src/lib/components/SidecarSetup.svelte
+++ b/src/lib/components/SidecarSetup.svelte
@@ -0,0 +1,320 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { listen } from '@tauri-apps/api/event';
  import type { UnlistenFn } from '@tauri-apps/api/event';
  import { onMount } from 'svelte';
  interface Props {
    onComplete: () => void;
  }
  let { onComplete }: Props = $props();
  let variant = $state<'cpu' | 'cuda'>('cpu');
  let downloading = $state(false);
  let downloadProgress = $state({ downloaded: 0, total: 0, percent: 0 });
  let error = $state('');
  let success = $state(false);
  let unlisten: UnlistenFn | null = null;
  onMount(() => {
    return () => {
      unlisten?.();
    };
  });
  function formatBytes(bytes: number): string {
    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
    if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
    return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
  }
  async function startDownload() {
    downloading = true;
    error = '';
    success = false;
    unlisten = await listen<{ downloaded: number; total: number; percent: number }>(
      'sidecar-download-progress',
      (event) => {
        downloadProgress = event.payload;
      }
    );
    try {
      await invoke('download_sidecar', { variant });
      success = true;
      // Brief pause so the user sees "Complete" before the screen goes away
      setTimeout(() => {
        onComplete();
      }, 800);
    } catch (err) {
      error = String(err);
    } finally {
      downloading = false;
      unlisten?.();
      unlisten = null;
    }
  }
 </script>
 <div class="setup-overlay">
  <div class="setup-card">
    <h1 class="app-title">Voice to Notes</h1>
    <h2 class="setup-heading">First-Time Setup</h2>
    <p class="setup-description">
      Voice to Notes needs to download its AI engine to transcribe audio.
    </p>
    {#if !downloading && !success}
      <div class="variant-options">
        <label class="variant-option" class:selected={variant === 'cpu'}>
          <input type="radio" name="variant" value="cpu" bind:group={variant} />
          <div class="variant-info">
            <span class="variant-label">Standard (CPU)</span>
            <span class="variant-desc">Works on all computers (~500 MB download)</span>
          </div>
        </label>
        <label class="variant-option" class:selected={variant === 'cuda'}>
          <input type="radio" name="variant" value="cuda" bind:group={variant} />
          <div class="variant-info">
            <span class="variant-label">GPU Accelerated (CUDA)</span>
            <span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
          </div>
        </label>
      </div>
      {#if error}
        <div class="error-box">
          <p class="error-text">{error}</p>
          <button class="btn-retry" onclick={startDownload}>Retry</button>
        </div>
      {:else}
        <button class="btn-download" onclick={startDownload}>
          Download &amp; Install
        </button>
      {/if}
    {:else if downloading}
      <div class="progress-section">
        <div class="progress-bar-track">
          <div class="progress-bar-fill" style="width: {downloadProgress.percent}%"></div>
        </div>
        <p class="progress-text">
          {downloadProgress.percent}% — {formatBytes(downloadProgress.downloaded)} / {formatBytes(downloadProgress.total)}
        </p>
        <p class="progress-hint">Downloading {variant === 'cuda' ? 'GPU' : 'CPU'} engine...</p>
      </div>
    {:else if success}
      <div class="success-section">
        <div class="success-icon">&#10003;</div>
        <p class="success-text">Setup complete!</p>
      </div>
    {/if}
  </div>
 </div>
 <style>
  .setup-overlay {
    position: fixed;
    inset: 0;
    background: #0a0a23;
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: 10000;
  }
  .setup-card {
    background: #16213e;
    border: 1px solid #2a3a5e;
    border-radius: 12px;
    padding: 2.5rem 3rem;
    max-width: 480px;
    width: 90vw;
    color: #e0e0e0;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
    text-align: center;
  }
  .app-title {
    font-size: 1.8rem;
    margin: 0 0 0.25rem;
    color: #e94560;
    font-weight: 700;
  }
  .setup-heading {
    font-size: 1.1rem;
    margin: 0 0 0.75rem;
    color: #e0e0e0;
    font-weight: 500;
  }
  .setup-description {
    font-size: 0.9rem;
    color: #b0b0b0;
    margin: 0 0 1.5rem;
    line-height: 1.5;
  }
  .variant-options {
    display: flex;
    flex-direction: column;
    gap: 0.75rem;
    margin-bottom: 1.5rem;
    text-align: left;
  }
  .variant-option {
    display: flex;
    align-items: flex-start;
    gap: 0.75rem;
    padding: 0.85rem 1rem;
    border: 1px solid #2a3a5e;
    border-radius: 8px;
    cursor: pointer;
    transition: border-color 0.15s, background 0.15s;
  }
  .variant-option:hover {
    border-color: #4a5568;
    background: rgba(255, 255, 255, 0.02);
  }
  .variant-option.selected {
    border-color: #e94560;
    background: rgba(233, 69, 96, 0.08);
  }
  .variant-option input[type='radio'] {
    margin-top: 0.2rem;
    accent-color: #e94560;
    flex-shrink: 0;
  }
  .variant-info {
    display: flex;
    flex-direction: column;
    gap: 0.2rem;
  }
  .variant-label {
    font-size: 0.9rem;
    font-weight: 500;
    color: #e0e0e0;
  }
  .variant-desc {
    font-size: 0.78rem;
    color: #888;
  }
  .btn-download {
    background: #e94560;
    border: none;
    color: white;
    padding: 0.7rem 1.5rem;
    border-radius: 6px;
    cursor: pointer;
    font-size: 0.9rem;
    font-weight: 500;
    width: 100%;
    transition: background 0.15s;
  }
  .btn-download:hover {
    background: #d63851;
  }
  .progress-section {
    margin-top: 0.5rem;
  }
  .progress-bar-track {
    width: 100%;
    height: 8px;
    background: #1a1a2e;
    border-radius: 4px;
    overflow: hidden;
    border: 1px solid #2a3a5e;
  }
  .progress-bar-fill {
    height: 100%;
    background: #e94560;
    border-radius: 4px;
    transition: width 0.3s ease;
  }
  .progress-text {
    margin: 0.75rem 0 0;
    font-size: 0.85rem;
    color: #e0e0e0;
    font-variant-numeric: tabular-nums;
  }
  .progress-hint {
    margin: 0.35rem 0 0;
    font-size: 0.78rem;
    color: #888;
  }
  .error-box {
    background: rgba(233, 69, 96, 0.1);
    border: 1px solid rgba(233, 69, 96, 0.3);
    border-radius: 8px;
    padding: 1rem;
  }
  .error-text {
    color: #e94560;
    font-size: 0.85rem;
    margin: 0 0 0.75rem;
    word-break: break-word;
    line-height: 1.4;
  }
  .btn-retry {
    background: #e94560;
    border: none;
    color: white;
    padding: 0.5rem 1.25rem;
    border-radius: 6px;
    cursor: pointer;
    font-size: 0.85rem;
    font-weight: 500;
  }
  .btn-retry:hover {
    background: #d63851;
  }
  .success-section {
    display: flex;
    flex-direction: column;
    align-items: center;
    gap: 0.5rem;
    padding: 1rem 0;
  }
  .success-icon {
    width: 48px;
    height: 48px;
    border-radius: 50%;
    background: rgba(78, 205, 196, 0.15);
    color: #4ecdc4;
    display: flex;
    align-items: center;
    justify-content: center;
    font-size: 1.5rem;
    font-weight: 700;
  }
  .success-text {
    color: #4ecdc4;
    font-size: 1rem;
    margin: 0;
    font-weight: 500;
  }
 </style>
--- a/src/lib/components/SpeakerManager.svelte
+++ b/src/lib/components/SpeakerManager.svelte
@@ -1,5 +1,6 @@
 <script lang="ts">
  import { speakers } from '$lib/stores/transcript';
  import { settings } from '$lib/stores/settings';
  import type { Speaker } from '$lib/types/transcript';
  let editingSpeakerId = $state<string | null>(null);
@@ -34,7 +35,14 @@
 <div class="speaker-manager">
  <h3>Speakers</h3>
  {#if $speakers.length === 0}
-    <p class="empty-hint">No speakers detected yet</p>
+    <p class="empty-hint">No speakers detected</p>
    {#if $settings.skip_diarization}
      <p class="setup-hint">Speaker detection is disabled. Enable it in Settings &gt; Speakers.</p>
    {:else if !$settings.hf_token}
      <p class="setup-hint">Speaker detection requires a HuggingFace token. Configure it in Settings &gt; Speakers.</p>
    {:else}
      <p class="setup-hint">Speaker detection ran but found no distinct speakers, or the model may need to be downloaded. Check Settings &gt; Speakers.</p>
    {/if}
  {:else}
    <ul class="speaker-list">
      {#each $speakers as speaker (speaker.id)}
@@ -78,6 +86,19 @@
  .empty-hint {
    color: #666;
    font-size: 0.875rem;
    margin-bottom: 0.25rem;
  }
  .setup-hint {
    color: #555;
    font-size: 0.75rem;
    line-height: 1.4;
  }
  .setup-hint code {
    background: rgba(233, 69, 96, 0.15);
    color: #e94560;
    padding: 0.1rem 0.3rem;
    border-radius: 3px;
    font-size: 0.7rem;
  }
  .speaker-list {
    list-style: none;
--- a/src/lib/components/TranscriptEditor.svelte
+++ b/src/lib/components/TranscriptEditor.svelte
@@ -60,12 +60,14 @@
  function finishEditing(segmentId: string) {
    const trimmed = editText.trim();
    if (trimmed) {
      // Update the segment text in the store
      segments.update(segs => segs.map(s => {
        if (s.id !== segmentId) return s;
        const newWordTexts = trimmed.split(/\s+/);
        const newWords = redistributeWords(s, newWordTexts);
        return {
          ...s,
          text: trimmed,
          words: newWords,
          original_text: s.original_text ?? s.text,
          is_edited: true,
          edited_at: new Date().toISOString(),
@@ -76,6 +78,106 @@
    editingSegmentId = null;
  }
  /**
   * Redistribute word timing after an edit.
   *
   * Uses a diff-like alignment between old and new word lists:
   * - Unchanged words keep their original timing
   * - Spelling fixes (same position, same count) keep timing
   * - Split words (1 old → N new) divide the original time range proportionally
   * - Inserted words with no match get interpolated timing
   */
  function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
    const oldWords = segment.words;
    // Same word count — preserve per-word timing (spelling fixes)
    if (newWordTexts.length === oldWords.length) {
      return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
    }
    // Align old words to new words using a simple greedy match.
    // Build a mapping: for each old word, which new words does it cover?
    const oldTexts = oldWords.map(w => w.word.toLowerCase());
    const newTexts = newWordTexts.map(w => w.toLowerCase());
    // Walk both lists, greedily matching old words to new words
    const result: Word[] = [];
    let oldIdx = 0;
    let newIdx = 0;
    while (newIdx < newTexts.length) {
      if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
        // Exact match — keep original timing
        result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
        oldIdx++;
        newIdx++;
      } else if (oldIdx < oldTexts.length) {
        // Check if old word was split into multiple new words.
        // E.g., "gonna" → "going to": see if concatenating upcoming new words
        // matches the old word (or close enough — just check if old word's chars
        // are consumed by the next few new words).
        let splitCount = 0;
        let combined = '';
        for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
          combined += (k > newIdx ? '' : '') + newTexts[k];
          if (combined.length >= oldTexts[oldIdx].length) {
            splitCount = k - newIdx + 1;
            break;
          }
        }
        if (splitCount > 1) {
          // Split: distribute the old word's time range proportionally
          const ow = oldWords[oldIdx];
          const totalDuration = ow.end_ms - ow.start_ms;
          for (let k = 0; k < splitCount; k++) {
            const fraction = 1 / splitCount;
            result.push({
              id: `${segment.id}-word-${newIdx + k}`,
              segment_id: segment.id,
              word: newWordTexts[newIdx + k],
              start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
              end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
              confidence: ow.confidence,
              word_index: newIdx + k,
            });
          }
          oldIdx++;
          newIdx += splitCount;
        } else {
          // No match found — interpolate timing from neighbors
          const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
          const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
          result.push({
            id: `${segment.id}-word-${newIdx}`,
            segment_id: segment.id,
            word: newWordTexts[newIdx],
            start_ms: prevEnd,
            end_ms: nextStart,
            confidence: 1.0,
            word_index: newIdx,
          });
          newIdx++;
        }
      } else {
        // No more old words — use end of segment
        const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
        result.push({
          id: `${segment.id}-word-${newIdx}`,
          segment_id: segment.id,
          word: newWordTexts[newIdx],
          start_ms: prevEnd,
          end_ms: segment.end_ms,
          confidence: 1.0,
          word_index: newIdx,
        });
        newIdx++;
      }
    }
    return result;
  }
  function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
    if (e.key === 'Escape') {
      editingSegmentId = null;
@@ -170,7 +272,9 @@
 <style>
  .transcript-editor {
    flex: 1;
    min-width: 0;
    overflow-y: auto;
    overflow-x: hidden;
    padding: 1rem;
    background: #16213e;
    border-radius: 8px;
@@ -217,6 +321,9 @@
  .segment-text {
    line-height: 1.6;
    padding-left: 0.75rem;
    white-space: pre-wrap;
    word-wrap: break-word;
    overflow-wrap: break-word;
  }
  .word {
    cursor: pointer;
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -12,6 +12,8 @@
  let container: HTMLDivElement;
  let wavesurfer: WaveSurfer | null = $state(null);
  let isReady = $state(false);
  let isLoading = $state(false);
  let currentTime = $state('0:00');
  let totalTime = $state('0:00');
@@ -31,6 +33,7 @@
      barWidth: 2,
      barGap: 1,
      barRadius: 2,
      backend: 'WebAudio',
    });
    wavesurfer.on('timeupdate', (time: number) => {
@@ -39,6 +42,8 @@
    });
    wavesurfer.on('ready', () => {
      isReady = true;
      isLoading = false;
      const dur = wavesurfer!.getDuration();
      durationMs.set(Math.round(dur * 1000));
      totalTime = formatTime(dur);
@@ -48,8 +53,18 @@
    wavesurfer.on('pause', () => isPlaying.set(false));
    wavesurfer.on('finish', () => isPlaying.set(false));
    wavesurfer.on('loading', () => {
      isReady = false;
    });
    wavesurfer.on('error', (err: Error) => {
      console.error('[voice-to-notes] WaveSurfer error:', err);
      isLoading = false;
      loadError = 'Failed to load audio';
    });
    if (audioUrl) {
-      wavesurfer.load(audioUrl);
+      loadAudio(audioUrl);
    }
  });
@@ -57,20 +72,21 @@
    wavesurfer?.destroy();
  });
-  /** Toggle play/pause. Exposed for keyboard shortcuts. */
+  /** Toggle play/pause from current position. Exposed for keyboard shortcuts. */
  export function togglePlayPause() {
-    wavesurfer?.playPause();
+    if (!wavesurfer || !isReady) return;
    wavesurfer.playPause();
  }
  function skipBack() {
-    if (wavesurfer) {
+    if (wavesurfer && isReady) {
      const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
      wavesurfer.setTime(time);
    }
  }
  function skipForward() {
-    if (wavesurfer) {
+    if (wavesurfer && isReady) {
      const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
      wavesurfer.setTime(time);
    }
@@ -78,16 +94,17 @@
  /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
  export function seekTo(timeMs: number) {
-    if (wavesurfer) {
+    if (!wavesurfer || !isReady) {
      console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
      return;
    }
    wavesurfer.setTime(timeMs / 1000);
      if (!wavesurfer.isPlaying()) {
        wavesurfer.play();
      }
    }
  }
  /** Load a new audio file. */
  export function loadAudio(url: string) {
    isReady = false;
    isLoading = true;
    wavesurfer?.load(url);
  }
 </script>
@@ -95,11 +112,17 @@
 <div class="waveform-player">
  <div class="waveform-container" bind:this={container}></div>
  <div class="controls">
-    <button class="control-btn" onclick={skipBack} title="Back 5s">⏪</button>
+    <button class="control-btn" onclick={skipBack} title="Back 5s" disabled={!isReady}>⏪</button>
-    <button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause">
+    <button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause" disabled={!isReady}>
-      {#if $isPlaying}⏸{:else}▶{/if}
+      {#if !isReady}
        ⏳
      {:else if $isPlaying}
        ⏸
      {:else}
        ▶
      {/if}
    </button>
-    <button class="control-btn" onclick={skipForward} title="Forward 5s">⏩</button>
+    <button class="control-btn" onclick={skipForward} title="Forward 5s" disabled={!isReady}>⏩</button>
    <span class="time">{currentTime} / {totalTime}</span>
  </div>
 </div>
@@ -129,9 +152,13 @@
    cursor: pointer;
    font-size: 1rem;
  }
-  .control-btn:hover {
+  .control-btn:hover:not(:disabled) {
    background: #1a4a7a;
  }
  .control-btn:disabled {
    opacity: 0.4;
    cursor: not-allowed;
  }
  .play-btn {
    padding: 0.4rem 1rem;
    font-size: 1.2rem;
--- a/src/lib/stores/settings.ts
+++ b/src/lib/stores/settings.ts
@@ -8,12 +8,17 @@ export interface AppSettings {
  openai_model: string;
  anthropic_model: string;
  litellm_model: string;
-  local_model_path: string;
+  litellm_api_key: string;
-  local_binary_path: string;
+  litellm_api_base: string;
  ollama_url: string;
  ollama_model: string;
  transcription_model: string;
  transcription_device: string;
  transcription_language: string;
  skip_diarization: boolean;
  hf_token: string;
  num_speakers: number | null;
  devtools_enabled: boolean;
 }
 const defaults: AppSettings = {
@@ -23,12 +28,17 @@ const defaults: AppSettings = {
  openai_model: 'gpt-4o-mini',
  anthropic_model: 'claude-sonnet-4-6',
  litellm_model: 'gpt-4o-mini',
-  local_model_path: '',
+  litellm_api_key: '',
-  local_binary_path: 'llama-server',
+  litellm_api_base: '',
  ollama_url: 'http://localhost:11434',
  ollama_model: 'llama3.2',
  transcription_model: 'base',
  transcription_device: 'cpu',
  transcription_language: '',
  skip_diarization: false,
  hf_token: '',
  num_speakers: null,
  devtools_enabled: false,
 };
 export const settings = writable<AppSettings>({ ...defaults });
@@ -42,7 +52,27 @@ export async function loadSettings(): Promise<void> {
  }
 }
 export async function configureAIProvider(s: AppSettings): Promise<void> {
  const configMap: Record<string, Record<string, string>> = {
    openai: { api_key: s.openai_api_key, model: s.openai_model },
    anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
    litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
    local: { model: s.ollama_model, base_url: s.ollama_url.replace(/\/+$/, '') + '/v1' },
  };
  const config = configMap[s.ai_provider];
  if (config) {
    try {
      await invoke('ai_configure', { provider: s.ai_provider, config });
    } catch {
      // Sidecar may not be running yet
    }
  }
 }
 export async function saveSettings(s: AppSettings): Promise<void> {
  settings.set(s);
  await invoke('save_settings', { settings: s });
  // Configure the AI provider in the Python sidecar
  await configureAIProvider(s);
 }
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -8,17 +8,76 @@
  import AIChatPanel from '$lib/components/AIChatPanel.svelte';
  import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
  import SettingsModal from '$lib/components/SettingsModal.svelte';
  import SidecarSetup from '$lib/components/SidecarSetup.svelte';
  import { segments, speakers } from '$lib/stores/transcript';
-  import { settings, loadSettings } from '$lib/stores/settings';
+  import { settings, loadSettings, configureAIProvider } from '$lib/stores/settings';
  import type { Segment, Speaker } from '$lib/types/transcript';
-  import { onMount } from 'svelte';
+  import { onMount, tick } from 'svelte';
  let appReady = $state(false);
  let waveformPlayer: WaveformPlayer;
  let audioUrl = $state('');
  let showSettings = $state(false);
  // Sidecar state
  let sidecarReady = $state(false);
  let sidecarChecked = $state(false);
  // Sidecar update state
  let sidecarUpdate = $state<{ current_version: string; latest_version: string } | null>(null);
  let showUpdateDownload = $state(false);
  let updateDismissed = $state(false);
  // Project management state
  let currentProjectPath = $state<string | null>(null);
  let currentProjectName = $state('');
  let projectIsV2 = $state(false);
  let audioFilePath = $state('');
  let audioWavPath = $state('');
  async function checkSidecar() {
    try {
      const ready = await invoke<boolean>('check_sidecar');
      sidecarReady = ready;
    } catch {
      sidecarReady = false;
    }
    sidecarChecked = true;
  }
  async function checkSidecarUpdate() {
    try {
      const update = await invoke<{ current_version: string; latest_version: string } | null>('check_sidecar_update');
      sidecarUpdate = update;
    } catch {
      // Silently ignore update check failures
    }
  }
  function handleSidecarSetupComplete() {
    sidecarReady = true;
    configureAIProvider($settings);
    checkSidecarUpdate();
  }
  function handleUpdateComplete() {
    showUpdateDownload = false;
    sidecarUpdate = null;
  }
  onMount(() => {
-    loadSettings();
+    loadSettings().then(() => {
      // Restore devtools state from settings
      if ($settings.devtools_enabled) {
        invoke('toggle_devtools', { open: true });
      }
    });
    checkSidecar().then(() => {
      if (sidecarReady) {
        configureAIProvider($settings);
        checkSidecarUpdate();
      }
    });
    // Global keyboard shortcuts
    function handleKeyDown(e: KeyboardEvent) {
@@ -43,8 +102,8 @@
    // Close export dropdown on outside click
    function handleClickOutside(e: MouseEvent) {
      if (showExportMenu) {
      const target = e.target as HTMLElement;
      if (showExportMenu) {
        if (!target.closest('.export-dropdown')) {
          showExportMenu = false;
        }
@@ -54,23 +113,343 @@
    document.addEventListener('keydown', handleKeyDown);
    document.addEventListener('click', handleClickOutside);
    appReady = true;
    return () => {
      document.removeEventListener('keydown', handleKeyDown);
      document.removeEventListener('click', handleClickOutside);
    };
  });
  let isTranscribing = $state(false);
  let transcriptionCancelled = $state(false);
  let transcriptionProgress = $state(0);
  let transcriptionStage = $state('');
  let transcriptionMessage = $state('');
  let extractingAudio = $state(false);
  function handleCancelProcessing() {
    transcriptionCancelled = true;
    isTranscribing = false;
    transcriptionProgress = 0;
    transcriptionStage = '';
    transcriptionMessage = '';
    // Clear any partial results
    segments.set([]);
    speakers.set([]);
  }
  // Speaker color palette for auto-assignment
  const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
  function buildProjectData(projectName: string) {
    return {
      version: 2,
      name: projectName,
      source_file: audioFilePath,
      audio_wav: 'audio.wav',
      created_at: new Date().toISOString(),
      segments: $segments.map(seg => {
        const speaker = $speakers.find(s => s.id === seg.speaker_id);
        return {
          text: seg.text,
          start_ms: seg.start_ms,
          end_ms: seg.end_ms,
          speaker: speaker?.label ?? null,
          is_edited: seg.is_edited,
          words: seg.words.map(w => ({
            word: w.word,
            start_ms: w.start_ms,
            end_ms: w.end_ms,
            confidence: w.confidence ?? 0,
          })),
        };
      }),
      speakers: $speakers.map(s => ({
        label: s.label,
        display_name: s.display_name,
        color: s.color || '#e94560',
      })),
    };
  }
  /** Save to a specific folder — creates .vtn + audio.wav inside it. */
  async function saveToFolder(folderPath: string): Promise<boolean> {
    const projectName = folderPath.split(/[\\/]/).pop() || currentProjectName || 'Untitled';
    const vtnPath = `${folderPath}/${projectName}.vtn`;
    const wavPath = `${folderPath}/audio.wav`;
    const projectData = buildProjectData(projectName);
    try {
      await invoke('create_dir', { path: folderPath });
      if (audioWavPath && audioWavPath !== wavPath) {
        await invoke('copy_file', { src: audioWavPath, dst: wavPath });
        audioWavPath = wavPath;
      }
      await invoke('save_project_file', { path: vtnPath, project: projectData });
      currentProjectPath = vtnPath;
      currentProjectName = projectName;
      projectIsV2 = true;
      return true;
    } catch (err) {
      console.error('Failed to save project:', err);
      alert(`Failed to save: ${err}`);
      return false;
    }
  }
  async function saveProject() {
    // Already saved as v2 folder — save in place
    if (currentProjectPath && projectIsV2) {
      const folderPath = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
      await saveToFolder(folderPath);
      return;
    }
    // V1 project opened — migrate to folder structure
    if (currentProjectPath && !projectIsV2) {
      const oldVtnDir = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
      const projectName = currentProjectPath.split(/[\\/]/).pop()?.replace(/\.vtn$/i, '') || 'Untitled';
      const folderPath = `${oldVtnDir}/${projectName}`;
      const success = await saveToFolder(folderPath);
      if (success) {
        // Optionally remove the old .vtn file
        try {
          // Leave old file — user can delete manually
        } catch {}
      }
      return;
    }
    // Never saved — pick a folder
    await saveProjectAs();
  }
  async function saveProjectAs() {
    // Use save dialog so the user can type a new project name.
    // The chosen path is treated as the project folder (created if needed).
    const defaultName = currentProjectName || 'Untitled';
    const chosenPath = await save({
      defaultPath: defaultName,
      title: 'Save Project — enter a project name',
    });
    if (!chosenPath) return;
    // Strip any file extension the user may have typed (e.g. ".vtn")
    const folderPath = chosenPath.replace(/\.[^.\\/]+$/, '');
    await saveToFolder(folderPath);
  }
  async function openProject() {
    const filePath = await open({
      filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
      multiple: false,
    });
    if (!filePath) return;
    try {
      const project = await invoke<{
        version?: number;
        name: string;
        audio_file?: string;
        source_file?: string;
        audio_wav?: string;
        segments: Array<{
          text: string;
          start_ms: number;
          end_ms: number;
          speaker: string | null;
          is_edited: boolean;
          words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
        }>;
        speakers: Array<{ label: string; display_name: string | null; color: string }>;
      }>('load_project_file', { path: filePath });
      // Rebuild speakers
      const newSpeakers: Speaker[] = project.speakers.map((s, idx) => ({
        id: `speaker-${idx}`,
        project_id: '',
        label: s.label,
        display_name: s.display_name,
        color: s.color,
      }));
      speakers.set(newSpeakers);
      const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
      // Rebuild segments
      const newSegments: Segment[] = project.segments.map((seg, idx) => ({
        id: `seg-${idx}`,
        project_id: '',
        media_file_id: '',
        speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
        start_ms: seg.start_ms,
        end_ms: seg.end_ms,
        text: seg.text,
        original_text: null,
        confidence: null,
        is_edited: seg.is_edited,
        edited_at: null,
        segment_index: idx,
        words: seg.words.map((w, widx) => ({
          id: `word-${idx}-${widx}`,
          segment_id: `seg-${idx}`,
          word: w.word,
          start_ms: w.start_ms,
          end_ms: w.end_ms,
          confidence: w.confidence,
          word_index: widx,
        })),
      }));
      segments.set(newSegments);
      // Determine the directory the .vtn file is in
      const vtnDir = (filePath as string).replace(/[\\/][^\\/]+$/, '');
      const version = project.version ?? 1;
      projectIsV2 = version >= 2;
      // Resolve audio for wavesurfer playback
      if (version >= 2) {
        // Version 2: audio_wav is relative to the .vtn directory, source_file is the original import path
        audioFilePath = project.source_file || '';
        const wavRelative = project.audio_wav || 'audio.wav';
        const resolvedWav = `${vtnDir}/${wavRelative}`;
        const wavExists = await invoke<boolean>('check_file_exists', { path: resolvedWav });
        if (wavExists) {
          audioWavPath = resolvedWav;
          audioUrl = convertFileSrc(resolvedWav);
          waveformPlayer?.loadAudio(audioUrl);
        } else {
          // WAV missing — try re-extracting from the original source file
          const sourceExists = audioFilePath ? await invoke<boolean>('check_file_exists', { path: audioFilePath }) : false;
          if (sourceExists) {
            extractingAudio = true;
            await tick();
            try {
              const outputPath = `${vtnDir}/${wavRelative}`;
              const wavPath = await invoke<string>('extract_audio', { filePath: audioFilePath, outputPath });
              audioWavPath = wavPath;
              audioUrl = convertFileSrc(wavPath);
              waveformPlayer?.loadAudio(audioUrl);
            } catch (err) {
              console.error('Failed to re-extract audio:', err);
              alert(`Failed to re-extract audio: ${err}`);
            } finally {
              extractingAudio = false;
            }
          } else {
            // Both missing — ask user to locate the file
            const shouldRelink = confirm(
              'The audio file for this project could not be found.\n\n' +
              `Original source: ${audioFilePath || '(unknown)'}\n\n` +
              'Would you like to locate the file?'
            );
            if (shouldRelink) {
              const newPath = await open({
                multiple: false,
                filters: [{
                  name: 'Audio/Video',
                  extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
                               'mp4', 'mkv', 'avi', 'mov', 'webm'],
                }],
              });
              if (newPath) {
                audioFilePath = newPath;
                extractingAudio = true;
                await tick();
                try {
                  const outputPath = `${vtnDir}/${wavRelative}`;
                  const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
                  audioWavPath = wavPath;
                  audioUrl = convertFileSrc(wavPath);
                  waveformPlayer?.loadAudio(audioUrl);
                } catch (err) {
                  console.error('Failed to extract audio from re-linked file:', err);
                  alert(`Failed to extract audio: ${err}`);
                } finally {
                  extractingAudio = false;
                }
              }
            }
          }
        }
      } else {
        // Version 1 (legacy): audio_file is the source path
        const sourceFile = project.audio_file || '';
        audioFilePath = sourceFile;
        const sourceExists = sourceFile ? await invoke<boolean>('check_file_exists', { path: sourceFile }) : false;
        if (sourceExists) {
          // Extract WAV next to the .vtn file for playback
          extractingAudio = true;
          await tick();
          try {
            const outputPath = `${vtnDir}/audio.wav`;
            const wavPath = await invoke<string>('extract_audio', { filePath: sourceFile, outputPath });
            audioWavPath = wavPath;
            audioUrl = convertFileSrc(wavPath);
            waveformPlayer?.loadAudio(audioUrl);
          } catch (err) {
            console.error('Failed to extract audio:', err);
            alert(`Failed to extract audio: ${err}`);
          } finally {
            extractingAudio = false;
          }
        } else {
          // Source missing — ask user to locate the file
          const shouldRelink = confirm(
            'The audio file for this project could not be found.\n\n' +
            `Original path: ${sourceFile || '(unknown)'}\n\n` +
            'Would you like to locate the file?'
          );
          if (shouldRelink) {
            const newPath = await open({
              multiple: false,
              filters: [{
                name: 'Audio/Video',
                extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
                             'mp4', 'mkv', 'avi', 'mov', 'webm'],
              }],
            });
            if (newPath) {
              audioFilePath = newPath;
              extractingAudio = true;
              await tick();
              try {
                const outputPath = `${vtnDir}/audio.wav`;
                const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
                audioWavPath = wavPath;
                audioUrl = convertFileSrc(wavPath);
                waveformPlayer?.loadAudio(audioUrl);
              } catch (err) {
                console.error('Failed to extract audio from re-linked file:', err);
                alert(`Failed to extract audio: ${err}`);
              } finally {
                extractingAudio = false;
              }
            }
          }
        }
      }
      currentProjectPath = filePath as string;
      currentProjectName = project.name;
    } catch (err) {
      console.error('Failed to load project:', err);
      alert(`Failed to load project: ${err}`);
    }
  }
  function handleWordClick(timeMs: number) {
    console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
    waveformPlayer?.seekTo(timeMs);
  }
  function handleTextEdit(segmentId: string, newText: string) {
    // In-memory store is already updated by TranscriptEditor.
    // Changes persist when user saves the project file.
  }
  async function handleFileImport() {
    const filePath = await open({
      multiple: false,
@@ -82,28 +461,126 @@
    });
    if (!filePath) return;
-    // Convert file path to asset URL for wavesurfer
+    // Always extract audio to WAV for wavesurfer playback
-    audioUrl = convertFileSrc(filePath);
+    extractingAudio = true;
    await tick();
    try {
      const wavPath = await invoke<string>('extract_audio', { filePath });
      audioWavPath = wavPath;
    } catch (err) {
      console.error('[voice-to-notes] Failed to extract audio:', err);
      const msg = String(err);
      if (msg.includes('ffmpeg not found')) {
        alert(
          'FFmpeg is required to extract audio.\n\n' +
          'Install FFmpeg:\n' +
          '  Windows: winget install ffmpeg\n' +
          '  macOS: brew install ffmpeg\n' +
          '  Linux: sudo apt install ffmpeg\n\n' +
          'Then restart Voice to Notes and try again.'
        );
      } else {
        alert(`Failed to extract audio: ${msg}`);
      }
      return;
    } finally {
      extractingAudio = false;
    }
    // Track the original file path for the sidecar (it does its own conversion)
    audioFilePath = filePath;
    audioUrl = convertFileSrc(audioWavPath);
    waveformPlayer?.loadAudio(audioUrl);
    // Clear previous results
    segments.set([]);
    speakers.set([]);
    // Start pipeline (transcription + diarization)
    isTranscribing = true;
    transcriptionCancelled = false;
    transcriptionProgress = 0;
    transcriptionStage = 'Starting...';
    transcriptionMessage = 'Initializing pipeline...';
    // Flush DOM so the progress overlay renders before the blocking invoke
    await tick();
    // Listen for progress events from the sidecar
    const unlisten = await listen<{
      percent: number;
      stage: string;
      message: string;
    }>('pipeline-progress', (event) => {
      console.log('[voice-to-notes] Progress event:', event.payload);
      const { percent, stage, message } = event.payload;
      if (typeof percent === 'number') transcriptionProgress = percent;
      if (typeof stage === 'string') transcriptionStage = stage;
      if (typeof message === 'string') transcriptionMessage = message;
    });
    const unlistenSegment = await listen<{
      index: number;
      text: string;
      start_ms: number;
      end_ms: number;
      words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
    }>('pipeline-segment', (event) => {
      const seg = event.payload;
      const newSeg: Segment = {
        id: `seg-${seg.index}`,
        project_id: '',
        media_file_id: '',
        speaker_id: null,
        start_ms: seg.start_ms,
        end_ms: seg.end_ms,
        text: seg.text,
        original_text: null,
        confidence: null,
        is_edited: false,
        edited_at: null,
        segment_index: seg.index,
        words: seg.words.map((w, widx) => ({
          id: `word-${seg.index}-${widx}`,
          segment_id: `seg-${seg.index}`,
          word: w.word,
          start_ms: w.start_ms,
          end_ms: w.end_ms,
          confidence: w.confidence,
          word_index: widx,
        })),
      };
      segments.update(segs => [...segs, newSeg]);
    });
    const unlistenSpeaker = await listen<{
      updates: Array<{ index: number; speaker: string }>;
    }>('pipeline-speaker-update', (event) => {
      const { updates } = event.payload;
      // Build speakers from unique labels
      const uniqueLabels = [...new Set(updates.map(u => u.speaker))].sort();
      const newSpeakers: Speaker[] = uniqueLabels.map((label, idx) => ({
        id: `speaker-${idx}`,
        project_id: '',
        label,
        display_name: null,
        color: speakerColors[idx % speakerColors.length],
      }));
      speakers.set(newSpeakers);
      // Update existing segments with speaker assignments
      const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
      segments.update(segs =>
        segs.map((seg, i) => {
          const update = updates.find(u => u.index === i);
          if (update) {
            return { ...seg, speaker_id: speakerLookup.get(update.speaker) ?? null };
          }
          return seg;
        })
      );
    });
    try {
      const result = await invoke<{
        segments: Array<{
@@ -128,8 +605,13 @@
        device: $settings.transcription_device || undefined,
        language: $settings.transcription_language || undefined,
        skipDiarization: $settings.skip_diarization || undefined,
        hfToken: $settings.hf_token || undefined,
        numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
      });
      // If cancelled while processing, discard results
      if (transcriptionCancelled) return;
      // Create speaker entries from pipeline result
      const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
        id: `speaker-${idx}`,
@@ -169,11 +651,18 @@
      }));
      segments.set(newSegments);
      // Set project name from audio file name (user can save explicitly)
      const fileName = filePath.split(/[\\/]/).pop() || 'Untitled';
      currentProjectName = fileName.replace(/\.[^.]+$/, '');
      currentProjectPath = null;
    } catch (err) {
      console.error('Pipeline failed:', err);
      alert(`Pipeline failed: ${err}`);
    } finally {
      unlisten();
      unlistenSegment();
      unlistenSpeaker();
      isTranscribing = false;
    }
  }
@@ -229,11 +718,50 @@
  }
 </script>
 {#if !appReady || !sidecarChecked}
  <div class="splash-screen">
    <h1 class="splash-title">Voice to Notes</h1>
    <p class="splash-subtitle">Loading...</p>
    <div class="splash-spinner"></div>
  </div>
 {:else if sidecarChecked && !sidecarReady && !showUpdateDownload}
  <SidecarSetup onComplete={handleSidecarSetupComplete} />
 {:else if showUpdateDownload}
  <SidecarSetup onComplete={handleUpdateComplete} />
 {:else}
  <div class="app-shell">
  {#if sidecarUpdate && !updateDismissed}
    <div class="update-banner">
      <span class="update-text">
        Sidecar update available (v{sidecarUpdate.current_version} &rarr; v{sidecarUpdate.latest_version})
      </span>
      <button class="update-btn" onclick={() => showUpdateDownload = true}>
        Update
      </button>
      <button class="update-dismiss" onclick={() => updateDismissed = true} title="Dismiss">
        &times;
      </button>
    </div>
  {/if}
  <div class="app-header">
  <h1>Voice to Notes</h1>
    <div class="header-actions">
-    <button class="import-btn" onclick={handleFileImport}>
+      <button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
        Open Project
      </button>
      {#if $segments.length > 0}
        <button class="settings-btn" onclick={saveProject}>
          Save
        </button>
        <button class="settings-btn" onclick={saveProjectAs}>
          Save As
        </button>
      {/if}
      <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
        {#if isTranscribing}
          Processing...
        {:else}
          Import Audio/Video
        {/if}
      </button>
      <button class="settings-btn" onclick={() => showSettings = true} title="Settings">
        Settings
@@ -260,25 +788,37 @@
  <div class="workspace">
    <div class="main-content">
      <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
-    <TranscriptEditor onWordClick={handleWordClick} />
+      <TranscriptEditor onWordClick={handleWordClick} onTextEdit={handleTextEdit} />
    </div>
    <div class="sidebar-right">
      <SpeakerManager />
      <AIChatPanel />
    </div>
  </div>
  </div>
  <ProgressOverlay
    visible={isTranscribing}
    percent={transcriptionProgress}
    stage={transcriptionStage}
    message={transcriptionMessage}
    onCancel={handleCancelProcessing}
  />
  {#if extractingAudio}
    <div class="extraction-overlay">
      <div class="extraction-card">
        <div class="extraction-spinner"></div>
        <p>Extracting audio...</p>
      </div>
    </div>
  {/if}
  <SettingsModal
    visible={showSettings}
    onClose={() => showSettings = false}
  />
 {/if}
 <style>
  .app-header {
@@ -289,10 +829,6 @@
    background: #0f3460;
    color: #e0e0e0;
  }
  h1 {
    font-size: 1.25rem;
    margin: 0;
  }
  .import-btn {
    background: #e94560;
    border: none;
@@ -303,9 +839,18 @@
    font-size: 0.875rem;
    font-weight: 500;
  }
-  .import-btn:hover {
+  .import-btn:hover:not(:disabled) {
    background: #d63851;
  }
  .import-btn:disabled {
    opacity: 0.7;
    cursor: not-allowed;
    animation: pulse 1.5s ease-in-out infinite;
  }
  @keyframes pulse {
    0%, 100% { opacity: 0.7; }
    50% { opacity: 1; }
  }
  .header-actions {
    display: flex;
    gap: 0.5rem;
@@ -320,10 +865,14 @@
    cursor: pointer;
    font-size: 0.875rem;
  }
-  .settings-btn:hover {
+  .settings-btn:hover:not(:disabled) {
    background: rgba(255,255,255,0.05);
    border-color: #e94560;
  }
  .settings-btn:disabled {
    opacity: 0.5;
    cursor: not-allowed;
  }
  .export-dropdown {
    position: relative;
  }
@@ -366,11 +915,19 @@
  .export-option:hover {
    background: rgba(233, 69, 96, 0.2);
  }
  .app-shell {
    display: flex;
    flex-direction: column;
    height: 100vh;
    overflow: hidden;
  }
  .workspace {
    display: flex;
    gap: 1rem;
    padding: 1rem;
-    height: calc(100vh - 3.5rem);
+    flex: 1;
    min-height: 0;
    overflow: hidden;
    background: #0a0a23;
  }
  .main-content {
@@ -379,6 +936,8 @@
    flex-direction: column;
    gap: 1rem;
    min-width: 0;
    min-height: 0;
    overflow-y: auto;
  }
  .sidebar-right {
    width: 300px;
@@ -386,5 +945,114 @@
    flex-direction: column;
    gap: 1rem;
    flex-shrink: 0;
    min-height: 0;
    overflow-y: auto;
  }
  .splash-screen {
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
    height: 100vh;
    background: #0a0a23;
    color: #e0e0e0;
    gap: 1rem;
  }
  .splash-title {
    font-size: 2rem;
    margin: 0;
    color: #e94560;
  }
  .splash-subtitle {
    font-size: 1rem;
    color: #888;
    margin: 0;
  }
  .splash-spinner {
    width: 32px;
    height: 32px;
    border: 3px solid #2a3a5e;
    border-top-color: #e94560;
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
  @keyframes spin {
    to { transform: rotate(360deg); }
  }
  /* Sidecar update banner */
  .update-banner {
    display: flex;
    align-items: center;
    gap: 0.75rem;
    padding: 0.5rem 1rem;
    background: rgba(78, 205, 196, 0.1);
    border-bottom: 1px solid rgba(78, 205, 196, 0.25);
    color: #e0e0e0;
    font-size: 0.85rem;
  }
  .update-text {
    flex: 1;
    color: #b0b0b0;
  }
  .update-btn {
    background: #4ecdc4;
    border: none;
    color: #0a0a23;
    padding: 0.3rem 0.85rem;
    border-radius: 4px;
    cursor: pointer;
    font-size: 0.8rem;
    font-weight: 600;
  }
  .update-btn:hover {
    background: #3dbdb5;
  }
  .update-dismiss {
    background: none;
    border: none;
    color: #888;
    font-size: 1.1rem;
    cursor: pointer;
    padding: 0.1rem 0.3rem;
    line-height: 1;
  }
  .update-dismiss:hover {
    color: #e0e0e0;
  }
  /* Audio extraction overlay */
  .extraction-overlay {
    position: fixed;
    inset: 0;
    background: rgba(0, 0, 0, 0.8);
    display: flex;
    align-items: center;
    justify-content: center;
    z-index: 9999;
  }
  .extraction-card {
    background: #16213e;
    padding: 2rem 2.5rem;
    border-radius: 12px;
    color: #e0e0e0;
    border: 1px solid #2a3a5e;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
    display: flex;
    flex-direction: column;
    align-items: center;
    gap: 1rem;
  }
  .extraction-card p {
    margin: 0;
    font-size: 1rem;
  }
  .extraction-spinner {
    width: 32px;
    height: 32px;
    border: 3px solid #2a3a5e;
    border-top-color: #e94560;
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
 </style>