chore: bump sidecar version to 1.0.10 [skip ci]

Fix diarization tensor mismatch + fix sidecar build triggers
Diarization: Audio.crop patch now pads short segments with zeros to match the expected duration. pyannote batches embeddings with vstack which requires uniform tensor sizes — the last segment of a file can be shorter than the 10s window. CI: Reordered sidecar workflow to check for python/ changes FIRST, before bumping version or configuring git. All subsequent steps are gated on has_changes. This prevents unnecessary version bumps and build runs when only app code changes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 01:30:51 +00:00 · 2026-03-22 18:30:43 -07:00 · 2026-03-23 01:05:15 +00:00 · 2026-03-22 18:05:08 -07:00 · 2026-03-23 00:42:00 +00:00 · 2026-03-23 00:41:56 +00:00
65 changed files with 5517 additions and 386 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -0,0 +1,21 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(git init:*)",
+      "Bash(git:*)",
+      "WebSearch",
+      "Bash(npm create:*)",
+      "Bash(cp:*)",
+      "Bash(npm install:*)",
+      "Bash(/home/jknapp/.cargo/bin/cargo test:*)",
+      "Bash(ruff:*)",
+      "Bash(npm run:*)",
+      "Bash(npx svelte-check:*)",
+      "Bash(pip install:*)",
+      "Bash(python3:*)",
+      "Bash(/home/jknapp/.cargo/bin/cargo check:*)",
+      "Bash(cargo check:*)",
+      "Bash(npm ls:*)"
+    ]
+  }
+}
--- a/.claude/worktrees/agent-a0bd87d1
+++ b/.claude/worktrees/agent-a0bd87d1
--- a/.claude/worktrees/agent-a198b5f8
+++ b/.claude/worktrees/agent-a198b5f8
--- a/.claude/worktrees/agent-ad3d6fca
+++ b/.claude/worktrees/agent-ad3d6fca
--- a/.claude/worktrees/agent-aefe2597
+++ b/.claude/worktrees/agent-aefe2597
--- a/.gitea/workflows/build-sidecar.yml
+++ b/.gitea/workflows/build-sidecar.yml
@@ -0,0 +1,402 @@
+name: Build Sidecars
+
+on:
+  push:
+    branches: [main]
+    paths: ['python/**']
+  workflow_dispatch:
+
+jobs:
+  bump-sidecar-version:
+    name: Bump sidecar version and tag
+    if: "!contains(github.event.head_commit.message, '[skip ci]')"
+    runs-on: ubuntu-latest
+    outputs:
+      version: ${{ steps.bump.outputs.version }}
+      tag: ${{ steps.bump.outputs.tag }}
+      has_changes: ${{ steps.check_changes.outputs.has_changes }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: Check for python changes
+        id: check_changes
+        run: |
+          # If triggered by workflow_dispatch, always build
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check if any python/ files changed in this commit
+          CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "")
+          if [ -n "$CHANGED" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            echo "Python changes detected: $CHANGED"
+          else
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+            echo "No python/ changes detected, skipping sidecar build"
+          fi
+
+      - name: Configure git
+        if: steps.check_changes.outputs.has_changes == 'true'
+        run: |
+          git config user.name "Gitea Actions"
+          git config user.email "actions@gitea.local"
+
+      - name: Bump sidecar patch version
+        if: steps.check_changes.outputs.has_changes == 'true'
+        id: bump
+        run: |
+          # Read current version from python/pyproject.toml
+          CURRENT=$(grep '^version = ' python/pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
+          echo "Current sidecar version: ${CURRENT}"
+
+          # Increment patch number
+          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
+          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
+          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
+          NEW_PATCH=$((PATCH + 1))
+          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
+          echo "New sidecar version: ${NEW_VERSION}"
+
+          # Update python/pyproject.toml
+          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" python/pyproject.toml
+
+          echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
+          echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
+
+      - name: Commit and tag
+        if: steps.check_changes.outputs.has_changes == 'true'
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          NEW_VERSION="${{ steps.bump.outputs.version }}"
+          TAG="${{ steps.bump.outputs.tag }}"
+          git add python/pyproject.toml
+          git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
+          git tag "${TAG}"
+
+          # Push using token for authentication (rebase in case another workflow pushed first)
+          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
+          git pull --rebase "${REMOTE_URL}" main || true
+          git push "${REMOTE_URL}" HEAD:main
+          git push "${REMOTE_URL}" "${TAG}"
+
+      - name: Create Gitea release
+        if: steps.check_changes.outputs.has_changes == 'true'
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+          TAG="${{ steps.bump.outputs.tag }}"
+          VERSION="${{ steps.bump.outputs.version }}"
+          RELEASE_NAME="Sidecar v${VERSION}"
+
+          curl -s -X POST \
+            -H "Authorization: token ${BUILD_TOKEN}" \
+            -H "Content-Type: application/json" \
+            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
+            "${REPO_API}/releases"
+          echo "Created release: ${RELEASE_NAME}"
+
+  build-sidecar-linux:
+    name: Build Sidecar (Linux)
+    needs: bump-sidecar-version
+    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
+    runs-on: ubuntu-latest
+    env:
+      PYTHON_VERSION: "3.11"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
+
+      - name: Install uv
+        run: |
+          if command -v uv &> /dev/null; then
+            echo "uv already installed: $(uv --version)"
+          else
+            curl -LsSf https://astral.sh/uv/install.sh | sh
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install ffmpeg
+        run: sudo apt-get update && sudo apt-get install -y ffmpeg
+
+      - name: Set up Python
+        run: uv python install ${{ env.PYTHON_VERSION }}
+
+      - name: Build sidecar (CUDA)
+        working-directory: python
+        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
+
+      - name: Package sidecar (CUDA)
+        run: |
+          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cuda.zip .
+
+      - name: Build sidecar (CPU)
+        working-directory: python
+        run: |
+          rm -rf dist/voice-to-notes-sidecar
+          uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
+
+      - name: Package sidecar (CPU)
+        run: |
+          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cpu.zip .
+
+      - name: Upload to sidecar release
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          sudo apt-get install -y jq
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
+
+          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
+          echo "Waiting for sidecar release ${TAG} to be available..."
+          for i in $(seq 1 30); do
+            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/tags/${TAG}")
+            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
+
+            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
+              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
+              break
+            fi
+
+            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
+            sleep 10
+          done
+
+          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
+            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
+            exit 1
+          fi
+
+          for file in sidecar-*.zip; do
+            filename=$(basename "$file")
+            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
+            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
+
+            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
+            if [ -n "${ASSET_ID}" ]; then
+              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
+                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
+            fi
+
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              -H "Content-Type: application/octet-stream" \
+              -T "$file" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
+            echo "Upload response: HTTP ${HTTP_CODE}"
+          done
+
+  build-sidecar-windows:
+    name: Build Sidecar (Windows)
+    needs: bump-sidecar-version
+    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
+    runs-on: windows-latest
+    env:
+      PYTHON_VERSION: "3.11"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
+
+      - name: Install uv
+        shell: powershell
+        run: |
+          if (Get-Command uv -ErrorAction SilentlyContinue) {
+            Write-Host "uv already installed: $(uv --version)"
+          } else {
+            irm https://astral.sh/uv/install.ps1 | iex
+            echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          }
+
+      - name: Install ffmpeg
+        shell: powershell
+        run: choco install ffmpeg -y
+
+      - name: Set up Python
+        shell: powershell
+        run: uv python install ${{ env.PYTHON_VERSION }}
+
+      - name: Install 7-Zip
+        shell: powershell
+        run: |
+          if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
+            choco install 7zip -y
+          }
+
+      - name: Build sidecar (CUDA)
+        shell: powershell
+        working-directory: python
+        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
+
+      - name: Package sidecar (CUDA)
+        shell: powershell
+        run: |
+          7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\python\dist\voice-to-notes-sidecar\*
+
+      - name: Build sidecar (CPU)
+        shell: powershell
+        working-directory: python
+        run: |
+          Remove-Item -Recurse -Force dist\voice-to-notes-sidecar
+          uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
+
+      - name: Package sidecar (CPU)
+        shell: powershell
+        run: |
+          7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\python\dist\voice-to-notes-sidecar\*
+
+      - name: Upload to sidecar release
+        shell: powershell
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
+          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
+          $TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
+
+          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
+          Write-Host "Waiting for sidecar release ${TAG} to be available..."
+          $RELEASE_ID = $null
+
+          for ($i = 1; $i -le 30; $i++) {
+            try {
+              $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
+              $RELEASE_ID = $release.id
+
+              if ($RELEASE_ID) {
+                Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
+                break
+              }
+            } catch {
+              # Release not ready yet
+            }
+
+            Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
+            Start-Sleep -Seconds 10
+          }
+
+          if (-not $RELEASE_ID) {
+            Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
+            exit 1
+          }
+
+          Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
+            $filename = $_.Name
+            $encodedName = [System.Uri]::EscapeDataString($filename)
+            $size = [math]::Round($_.Length / 1MB, 1)
+            Write-Host "Uploading ${filename} (${size} MB)..."
+
+            try {
+              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
+              $existing = $assets | Where-Object { $_.name -eq $filename }
+              if ($existing) {
+                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
+              }
+            } catch {}
+
+            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
+            $result = curl.exe --fail --silent --show-error `
+              -X POST `
+              -H "Authorization: token $env:BUILD_TOKEN" `
+              -H "Content-Type: application/octet-stream" `
+              -T "$($_.FullName)" `
+              "$uploadUrl" 2>&1
+            if ($LASTEXITCODE -eq 0) {
+              Write-Host "Upload successful: ${filename}"
+            } else {
+              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
+            }
+          }
+
+  build-sidecar-macos:
+    name: Build Sidecar (macOS)
+    needs: bump-sidecar-version
+    if: needs.bump-sidecar-version.outputs.has_changes == 'true'
+    runs-on: macos-latest
+    env:
+      PYTHON_VERSION: "3.11"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-sidecar-version.outputs.tag }}
+
+      - name: Install uv
+        run: |
+          if command -v uv &> /dev/null; then
+            echo "uv already installed: $(uv --version)"
+          else
+            curl -LsSf https://astral.sh/uv/install.sh | sh
+            echo "$HOME/.local/bin" >> $GITHUB_PATH
+          fi
+
+      - name: Install ffmpeg
+        run: brew install ffmpeg
+
+      - name: Set up Python
+        run: uv python install ${{ env.PYTHON_VERSION }}
+
+      - name: Build sidecar (CPU)
+        working-directory: python
+        run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
+
+      - name: Package sidecar (CPU)
+        run: |
+          cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-macos-aarch64-cpu.zip .
+
+      - name: Upload to sidecar release
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          which jq || brew install jq
+
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+          TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
+
+          # Find the sidecar release by tag (retry up to 30 times with 10s delay)
+          echo "Waiting for sidecar release ${TAG} to be available..."
+          for i in $(seq 1 30); do
+            RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/tags/${TAG}")
+            RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
+
+            if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
+              echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
+              break
+            fi
+
+            echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
+            sleep 10
+          done
+
+          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
+            echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
+            exit 1
+          fi
+
+          for file in sidecar-*.zip; do
+            filename=$(basename "$file")
+            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
+            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
+
+            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
+            if [ -n "${ASSET_ID}" ]; then
+              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
+                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
+            fi
+
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              -H "Content-Type: application/octet-stream" \
+              -T "$file" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
+            echo "Upload response: HTTP ${HTTP_CODE}"
+          done
--- a/.gitea/workflows/cleanup-releases.yml
+++ b/.gitea/workflows/cleanup-releases.yml
@@ -0,0 +1,65 @@
+name: Cleanup Old Releases
+
+on:
+  # Run after release and sidecar workflows complete
+  schedule:
+    - cron: '0 6 * * *'  # Daily at 6am UTC
+  workflow_dispatch:
+
+jobs:
+  cleanup:
+    name: Remove old releases
+    runs-on: ubuntu-latest
+    env:
+      KEEP_COUNT: 5
+    steps:
+      - name: Cleanup old app releases
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+
+          # Get all releases, sorted newest first (API default)
+          RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+            "${REPO_API}/releases?limit=50")
+
+          # Separate app releases (v*) and sidecar releases (sidecar-v*)
+          APP_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("v") and (startswith("sidecar") | not)) | .id] | .[]')
+          SIDECAR_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("sidecar-v")) | .id] | .[]')
+
+          # Delete app releases beyond KEEP_COUNT
+          COUNT=0
+          for ID in $APP_IDS; do
+            COUNT=$((COUNT + 1))
+            if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
+              continue
+            fi
+            TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
+            echo "Deleting app release $ID ($TAG)..."
+            curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/$ID"
+            # Also delete the tag
+            curl -s -o /dev/null -X DELETE \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/tags/$TAG"
+          done
+
+          # Delete sidecar releases beyond KEEP_COUNT
+          COUNT=0
+          for ID in $SIDECAR_IDS; do
+            COUNT=$((COUNT + 1))
+            if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
+              continue
+            fi
+            TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
+            echo "Deleting sidecar release $ID ($TAG)..."
+            curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/$ID"
+            curl -s -o /dev/null -X DELETE \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/tags/$TAG"
+          done
+
+          echo "Cleanup complete. Kept latest ${{ env.KEEP_COUNT }} of each type."
--- a/.gitea/workflows/release.yml
+++ b/.gitea/workflows/release.yml
@@ -0,0 +1,305 @@
+name: Release
+
+on:
+  push:
+    branches: [main]
+
+jobs:
+  bump-version:
+    name: Bump version and tag
+    # Skip if this is a version-bump commit (avoid infinite loop)
+    if: "!contains(github.event.head_commit.message, '[skip ci]')"
+    runs-on: ubuntu-latest
+    outputs:
+      new_version: ${{ steps.bump.outputs.new_version }}
+      tag: ${{ steps.bump.outputs.tag }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Configure git
+        run: |
+          git config user.name "Gitea Actions"
+          git config user.email "actions@gitea.local"
+
+      - name: Bump patch version
+        id: bump
+        run: |
+          # Read current version from package.json
+          CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
+          echo "Current version: ${CURRENT}"
+
+          # Increment patch number
+          MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
+          MINOR=$(echo "${CURRENT}" | cut -d. -f2)
+          PATCH=$(echo "${CURRENT}" | cut -d. -f3)
+          NEW_PATCH=$((PATCH + 1))
+          NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
+          echo "New version: ${NEW_VERSION}"
+
+          # Update package.json
+          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
+
+          # Update src-tauri/tauri.conf.json
+          sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
+
+          # Update src-tauri/Cargo.toml (match version = "x.y.z" in [package] section)
+          sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
+
+          echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
+          echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
+
+      - name: Commit and tag
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          NEW_VERSION="${{ steps.bump.outputs.new_version }}"
+          git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml
+          git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
+          git tag "v${NEW_VERSION}"
+
+          # Push using token for authentication (rebase in case another workflow pushed first)
+          REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
+          git pull --rebase "${REMOTE_URL}" main || true
+          git push "${REMOTE_URL}" HEAD:main
+          git push "${REMOTE_URL}" "v${NEW_VERSION}"
+
+      - name: Create Gitea release
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+          TAG="${{ steps.bump.outputs.tag }}"
+          RELEASE_NAME="Voice to Notes ${TAG}"
+
+          curl -s -X POST \
+            -H "Authorization: token ${BUILD_TOKEN}" \
+            -H "Content-Type: application/json" \
+            -d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
+            "${REPO_API}/releases"
+          echo "Created release: ${RELEASE_NAME}"
+
+  # ── Platform builds (run after version bump) ──
+
+  build-linux:
+    name: Build App (Linux)
+    needs: bump-version
+    runs-on: ubuntu-latest
+    env:
+      NODE_VERSION: "20"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-version.outputs.tag }}
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      - name: Install Rust stable
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Build Tauri app
+        run: npm run tauri build
+
+      - name: Upload to release
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          sudo apt-get install -y jq
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+
+          TAG="${{ needs.bump-version.outputs.tag }}"
+          echo "Release tag: ${TAG}"
+
+          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
+
+          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
+            echo "ERROR: Failed to find release for tag ${TAG}."
+            exit 1
+          fi
+
+          echo "Release ID: ${RELEASE_ID}"
+
+          find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" \) | while IFS= read -r file; do
+            filename=$(basename "$file")
+            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
+            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
+
+            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
+            if [ -n "${ASSET_ID}" ]; then
+              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
+                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
+            fi
+
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              -H "Content-Type: application/octet-stream" \
+              -T "$file" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
+            echo "Upload response: HTTP ${HTTP_CODE}"
+          done
+
+  build-windows:
+    name: Build App (Windows)
+    needs: bump-version
+    runs-on: windows-latest
+    env:
+      NODE_VERSION: "20"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-version.outputs.tag }}
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      - name: Install Rust stable
+        shell: powershell
+        run: |
+          if (Get-Command rustup -ErrorAction SilentlyContinue) {
+            rustup default stable
+          } else {
+            Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
+            .\rustup-init.exe -y --default-toolchain stable
+            echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          }
+
+      - name: Install npm dependencies
+        shell: powershell
+        run: npm ci
+
+      - name: Build Tauri app
+        shell: powershell
+        run: npm run tauri build
+
+      - name: Upload to release
+        shell: powershell
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          $REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
+          $Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
+
+          $TAG = "${{ needs.bump-version.outputs.tag }}"
+          Write-Host "Release tag: ${TAG}"
+
+          $release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
+          $RELEASE_ID = $release.id
+          Write-Host "Release ID: ${RELEASE_ID}"
+
+          Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
+            $filename = $_.Name
+            $encodedName = [System.Uri]::EscapeDataString($filename)
+            $size = [math]::Round($_.Length / 1MB, 1)
+            Write-Host "Uploading ${filename} (${size} MB)..."
+
+            try {
+              $assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
+              $existing = $assets | Where-Object { $_.name -eq $filename }
+              if ($existing) {
+                Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
+              }
+            } catch {}
+
+            # Use curl for streaming upload (Invoke-RestMethod fails on large files)
+            $uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
+            $result = curl.exe --fail --silent --show-error `
+              -X POST `
+              -H "Authorization: token $env:BUILD_TOKEN" `
+              -H "Content-Type: application/octet-stream" `
+              -T "$($_.FullName)" `
+              "$uploadUrl" 2>&1
+            if ($LASTEXITCODE -eq 0) {
+              Write-Host "Upload successful: ${filename}"
+            } else {
+              Write-Host "WARNING: Upload failed for ${filename}: ${result}"
+            }
+          }
+
+  build-macos:
+    name: Build App (macOS)
+    needs: bump-version
+    runs-on: macos-latest
+    env:
+      NODE_VERSION: "20"
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ needs.bump-version.outputs.tag }}
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+
+      - name: Install Rust stable
+        run: |
+          curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      - name: Install system dependencies
+        run: brew install --quiet create-dmg || true
+
+      - name: Install npm dependencies
+        run: npm ci
+
+      - name: Build Tauri app
+        run: npm run tauri build
+
+      - name: Upload to release
+        env:
+          BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
+        run: |
+          which jq || brew install jq
+
+          REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
+
+          TAG="${{ needs.bump-version.outputs.tag }}"
+          echo "Release tag: ${TAG}"
+
+          RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+            "${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
+
+          if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
+            echo "ERROR: Failed to find release for tag ${TAG}."
+            exit 1
+          fi
+
+          echo "Release ID: ${RELEASE_ID}"
+
+          find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
+            filename=$(basename "$file")
+            encoded_name=$(echo "$filename" | sed 's/ /%20/g')
+            echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
+
+            ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
+            if [ -n "${ASSET_ID}" ]; then
+              curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
+                "${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
+            fi
+
+            HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
+              -H "Authorization: token ${BUILD_TOKEN}" \
+              -H "Content-Type: application/octet-stream" \
+              -T "$file" \
+              "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
+            echo "Upload response: HTTP ${HTTP_CODE}"
+          done
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,10 @@ Thumbs.db
 *.ogg
 *.flac
 !test/fixtures/*
+
+# Sidecar build artifacts
+src-tauri/binaries/*
+!src-tauri/binaries/.gitkeep
+src-tauri/sidecar.zip
+python/dist/
+python/build/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -8,7 +8,7 @@ Desktop app for transcribing audio/video with speaker identification. Runs local
 - **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
 - **Database:** SQLite (via rusqlite in Rust)
 - **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
- **Cloud AI providers:** LiteLLM, OpenAI, Anthropic (optional, user-configured)
+- **Cloud AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (optional, user-configured)
 - **Caption export:** pysubs2 (Python)
 - **Audio UI:** wavesurfer.js
 - **Transcript editor:** TipTap (ProseMirror)
@@ -40,7 +40,13 @@ docs/                   # Architecture and design documents
 - Database: UUIDs as primary keys (TEXT type in SQLite)
 - All timestamps in milliseconds (integer) relative to media file start

+## Distribution
+- Python sidecar is frozen via PyInstaller into a standalone binary for distribution
+- Tauri bundles the sidecar via `externalBin` — no Python required for end users
+- CI/CD builds on Gitea Actions (Linux, Windows, macOS ARM)
+- Dev mode uses system Python (`VOICE_TO_NOTES_DEV=1` or debug builds)
+
 ## Platform Targets
- Linux (primary development target)
- Windows (must work, tested before release)
- macOS (future, not yet targeted)
+- Linux x86_64 (primary development target)
+- Windows x86_64
+- macOS aarch64 (Apple Silicon)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,140 @@
+# Contributing to Voice to Notes
+
+Thank you for your interest in contributing! This guide covers how to set up the project for development and submit changes.
+
+## Development Setup
+
+### Prerequisites
+
+- **Node.js 20+** and npm
+- **Rust** (stable toolchain)
+- **Python 3.11+** with [uv](https://docs.astral.sh/uv/) (recommended) or pip
+- **System libraries (Linux only):**
+  ```bash
+  sudo apt install libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
+  ```
+
+### Clone and Install
+
+```bash
+git clone https://repo.anhonesthost.net/MacroPad/voice-to-notes.git
+cd voice-to-notes
+
+# Frontend
+npm install
+
+# Python sidecar
+cd python && pip install -e ".[dev]" && cd ..
+```
+
+### Running in Dev Mode
+
+```bash
+npm run tauri:dev
+```
+
+This runs the Svelte dev server + Tauri with hot-reload. The Python sidecar runs from your system Python (no PyInstaller needed in dev mode).
+
+### Building
+
+```bash
+# Build the Python sidecar (frozen binary)
+cd python && python build_sidecar.py --cpu-only && cd ..
+
+# Build the full app
+npm run tauri build
+```
+
+## Project Structure
+
+```
+src/                        # Svelte 5 frontend
+  lib/components/           # Reusable UI components
+  lib/stores/               # Svelte stores (app state)
+  routes/                   # SvelteKit pages
+src-tauri/                  # Rust backend (Tauri v2)
+  src/sidecar/              # Python sidecar lifecycle (download, extract, IPC)
+  src/commands/             # Tauri command handlers
+  src/db/                   # SQLite database layer
+python/                     # Python ML sidecar
+  voice_to_notes/           # Main package
+    services/               # Transcription, diarization, AI, export
+    ipc/                    # JSON-line IPC protocol
+    hardware/               # GPU/CPU detection
+.gitea/workflows/           # CI/CD pipelines
+docs/                       # Documentation
+```
+
+## How It Works
+
+The app has three layers:
+
+1. **Frontend (Svelte)** — UI, audio playback (wavesurfer.js), transcript editing (TipTap)
+2. **Backend (Rust/Tauri)** — Desktop integration, file access, SQLite, sidecar process management
+3. **Sidecar (Python)** — ML inference (faster-whisper, pyannote.audio), AI chat, export
+
+Rust and Python communicate via **JSON-line IPC** over stdin/stdout pipes. Each request has an `id`, `type`, and `payload`. The Python sidecar runs as a child process managed by `SidecarManager` in Rust.
+
+## Conventions
+
+### Rust
+- Follow standard Rust conventions
+- Run `cargo fmt` and `cargo clippy` before committing
+- Tauri commands go in `src-tauri/src/commands/`
+
+### Python
+- Python 3.11+, type hints everywhere
+- Use `ruff` for linting: `ruff check python/`
+- Tests with pytest: `cd python && pytest`
+- IPC messages: JSON-line format with `id`, `type`, `payload` fields
+
+### TypeScript / Svelte
+- Svelte 5 runes (`$state`, `$derived`, `$effect`)
+- Strict TypeScript
+- Components in `src/lib/components/`
+- State in `src/lib/stores/`
+
+### General
+- All timestamps in milliseconds (integer)
+- UUIDs as primary keys in the database
+- Don't bundle API keys or secrets — those are user-configured
+
+## Submitting Changes
+
+1. Fork the repository
+2. Create a feature branch: `git checkout -b my-feature`
+3. Make your changes
+4. Test locally with `npm run tauri:dev`
+5. Run linters: `cargo fmt && cargo clippy`, `ruff check python/`
+6. Commit with a clear message describing the change
+7. Open a Pull Request against `main`
+
+## CI/CD
+
+Pushes to `main` automatically:
+- Bump the app version and create a release (`release.yml`)
+- Build app installers for all platforms
+
+Changes to `python/` also trigger sidecar builds (`build-sidecar.yml`).
+
+## Areas for Contribution
+
+- UI/UX improvements
+- New export formats
+- Additional AI provider integrations
+- Performance optimizations
+- Accessibility improvements
+- Documentation and translations
+- Bug reports and testing on different platforms
+
+## Reporting Issues
+
+Open an issue on the [repository](https://repo.anhonesthost.net/MacroPad/voice-to-notes/issues) with:
+- Steps to reproduce
+- Expected vs actual behavior
+- Platform and version info
+- Sidecar logs (`%LOCALAPPDATA%\com.voicetonotes.app\sidecar.log` on Windows)
+
+## License
+
+By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
--- a/README.md
+++ b/README.md
@@ -1,30 +1,129 @@
 # Voice to Notes

-A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
+A desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, plain text, or Markdown.

-## Goals
+## Features

- **Speech-to-Text Transcription** — Accurately convert spoken audio from recordings into text
- **Speaker Identification (Diarization)** — Detect and distinguish between different speakers in a conversation
- **Speaker Naming** — Assign and persist speaker names/IDs across the transcription
- **Synchronized Playback** — Click any transcribed text segment to play back the corresponding audio for review and correction
- **Export Formats**
-  - Closed captioning files (SRT, VTT) for video
-  - Plain text documents with speaker labels
- **AI Integration** — Connect to AI providers to ask questions about the conversation and generate condensed notes/summaries
+- **Speech-to-Text** — Accurate transcription via faster-whisper with word-level timestamps. Supports 99 languages.
+- **Speaker Identification** — Detect and label speakers using pyannote.audio. Rename speakers for clean exports.
+- **GPU Acceleration** — CUDA support for NVIDIA GPUs (Windows/Linux). Falls back to CPU automatically.
+- **Synchronized Playback** — Click any word to seek. Waveform visualization via wavesurfer.js.
+- **AI Chat** — Ask questions about your transcript. Works with Ollama (local), OpenAI, Anthropic, or any OpenAI-compatible API.
+- **Export** — SRT, WebVTT, ASS, plain text, Markdown — all with speaker labels.
+- **Cross-Platform** — Linux, Windows, macOS (Apple Silicon).
+
+## Quick Start
+
+1. Download the installer from [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases)
+2. On first launch, choose **CPU** or **CUDA** sidecar (the AI engine downloads separately, ~500MB–2GB)
+3. Import an audio/video file and click **Transcribe**
+
+See the full [User Guide](docs/USER_GUIDE.md) for detailed setup and usage instructions.

 ## Platform Support

-| Platform | Status |
-|----------|--------|
-| Linux    | Planned (initial target) |
-| Windows  | Planned (initial target) |
-| macOS    | Future (pending hardware) |
+| Platform | Architecture | Installers |
+|----------|-------------|------------|
+| Linux    | x86_64      | .deb, .rpm |
+| Windows  | x86_64      | .msi, .exe (NSIS) |
+| macOS    | ARM (Apple Silicon) | .dmg |

-## Project Status
+## Architecture

-**Early planning phase** — Architecture and technology decisions in progress.
+The app is split into two independently versioned components:
+
+- **App** (v0.2.x) — Tauri desktop shell with Svelte frontend. Small installer (~50MB).
+- **Sidecar** (v1.x) — Python ML engine (faster-whisper, pyannote.audio). Downloaded on first launch. CPU (~500MB) or CUDA (~2GB) variants.
+
+This separation means app UI updates don't require re-downloading the sidecar, and sidecar updates don't require reinstalling the app.
+
+## Tech Stack
+
+| Component | Technology |
+|-----------|-----------|
+| Desktop shell | Tauri v2 (Rust + Svelte 5 / TypeScript) |
+| Transcription | faster-whisper (CTranslate2) |
+| Speaker ID | pyannote.audio 3.1 |
+| Audio UI | wavesurfer.js |
+| Transcript editor | TipTap (ProseMirror) |
+| AI (local) | Ollama (any model) |
+| AI (cloud) | OpenAI, Anthropic, OpenAI-compatible |
+| Caption export | pysubs2 |
+| Database | SQLite (rusqlite) |
+
+## Development
+
+### Prerequisites
+
+- Node.js 20+
+- Rust (stable)
+- Python 3.11+ with uv or pip
+- Linux: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`
+
+### Getting Started
+
+```bash
+# Install frontend dependencies
+npm install
+
+# Install Python sidecar dependencies
+cd python && pip install -e ".[dev]" && cd ..
+
+# Run in dev mode (uses system Python for the sidecar)
+npm run tauri:dev
+```
+
+### Building
+
+```bash
+# Build the frozen Python sidecar (CPU-only)
+cd python && python build_sidecar.py --cpu-only && cd ..
+
+# Build with CUDA support
+cd python && python build_sidecar.py --with-cuda && cd ..
+
+# Build the Tauri app
+npm run tauri build
+```
+
+### CI/CD
+
+Two Gitea Actions workflows in `.gitea/workflows/`:
+
+**`release.yml`** — Triggers on push to main:
+1. Bumps app version (patch), creates git tag and Gitea release
+2. Builds lightweight app installers for all platforms (no sidecar bundled)
+
+**`build-sidecar.yml`** — Triggers on changes to `python/` or manual dispatch:
+1. Bumps sidecar version, creates `sidecar-v*` tag and release
+2. Builds CPU + CUDA variants for Linux/Windows, CPU for macOS
+3. Uploads as separate release assets
+
+#### Required Secrets
+
+| Secret | Purpose |
+|--------|---------|
+| `BUILD_TOKEN` | Gitea API token for creating releases and pushing tags |
+
+### Project Structure
+
+```
+src/                        # Svelte 5 frontend
+  lib/components/           # UI components (waveform, transcript editor, settings, etc.)
+  lib/stores/               # Svelte stores (settings, transcript state)
+  routes/                   # SvelteKit pages
+src-tauri/                  # Rust backend
+  src/sidecar/              # Sidecar process manager (download, extract, IPC)
+  src/commands/             # Tauri command handlers
+  nsis-hooks.nsh            # Windows uninstall cleanup
+python/                     # Python sidecar
+  voice_to_notes/           # Python package (transcription, diarization, AI, export)
+  build_sidecar.py          # PyInstaller build script
+  voice_to_notes.spec       # PyInstaller spec
+.gitea/workflows/           # CI/CD (release.yml, build-sidecar.yml)
+docs/                       # Documentation
+```

 ## License

-MIT
+[MIT](LICENSE)
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -0,0 +1,203 @@
+# Voice to Notes — User Guide
+
+## Getting Started
+
+### Installation
+
+Download the installer for your platform from the [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases) page:
+
+- **Windows:** `.msi` or `-setup.exe`
+- **Linux:** `.deb` or `.rpm`
+- **macOS:** `.dmg`
+
+### First-Time Setup
+
+On first launch, Voice to Notes will prompt you to download its AI engine (the "sidecar"):
+
+1. Choose **Standard (CPU)** (~500 MB) or **GPU Accelerated (CUDA)** (~2 GB)
+   - Choose CUDA if you have an NVIDIA GPU for significantly faster transcription
+   - CPU works on all computers
+2. Click **Download & Install** and wait for the download to complete
+3. The app will proceed to the main interface once the sidecar is ready
+
+The sidecar only needs to be downloaded once. Updates are detected automatically on launch.
+
+---
+
+## Basic Workflow
+
+### 1. Import Audio
+
+- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
+- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM
+
+### 2. Transcribe
+
+After importing, click **Transcribe** to start the transcription pipeline:
+
+- **Transcription:** Converts speech to text with word-level timestamps
+- **Speaker Detection:** Identifies different speakers (if configured — see [Speaker Detection](#speaker-detection))
+- A progress bar shows the current stage and percentage
+
+### 3. Review and Edit
+
+- The **waveform** displays at the top — click anywhere to seek
+- The **transcript** shows below with speaker labels and timestamps
+- **Click any word** in the transcript to jump to that point in the audio
+- The current word highlights during playback
+- **Edit text** directly in the transcript — word timings are preserved
+
+### 4. Export
+
+Click **Export** and choose a format:
+
+| Format | Extension | Best For |
+|--------|-----------|----------|
+| SRT | `.srt` | Video subtitles (most compatible) |
+| WebVTT | `.vtt` | Web video players, HTML5 |
+| ASS/SSA | `.ass` | Styled subtitles with speaker colors |
+| Plain Text | `.txt` | Reading, sharing, pasting |
+| Markdown | `.md` | Documentation, notes |
+
+All formats include speaker labels when speaker detection is enabled.
+
+### 5. Save Project
+
+- **Ctrl+S** (Cmd+S) saves the current project as a `.vtn` file
+- This preserves the full transcript, speaker assignments, and edits
+- Reopen later to continue editing or re-export
+
+---
+
+## Playback Controls
+
+| Action | Shortcut |
+|--------|----------|
+| Play / Pause | **Space** |
+| Skip back 5s | **Left Arrow** |
+| Skip forward 5s | **Right Arrow** |
+| Seek to word | Click any word in the transcript |
+| Import audio | **Ctrl+O** / **Cmd+O** |
+| Open settings | **Ctrl+,** / **Cmd+,** |
+
+---
+
+## Speaker Detection
+
+Speaker detection (diarization) identifies who is speaking at each point in the audio. It requires a one-time setup:
+
+### Setup
+
+1. Go to **Settings > Speakers**
+2. Create a free account at [huggingface.co](https://huggingface.co/join)
+3. Accept the license on **all three** model pages:
+   - [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
+   - [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
+   - [pyannote/speaker-diarization-community-1](https://huggingface.co/pyannote/speaker-diarization-community-1)
+4. Create a token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (read access is sufficient)
+5. Paste the token in Settings and click **Test & Download Model**
+
+### Speaker Options
+
+- **Number of speakers:** Set to auto-detect or specify a fixed number for faster results
+- **Skip speaker detection:** Check this to only transcribe without identifying speakers
+
+### Managing Speakers
+
+After transcription, speakers appear as "Speaker 1", "Speaker 2", etc. in the left sidebar. Double-click a speaker name to rename it — the new name appears throughout the transcript and in exports.
+
+---
+
+## AI Chat
+
+The AI chat panel lets you ask questions about your transcript. The AI sees the full transcript with speaker labels as context.
+
+Example prompts:
+- "Summarize this conversation"
+- "What were the key action items?"
+- "What did Speaker 1 say about the budget?"
+
+### Setting Up Ollama (Local AI)
+
+[Ollama](https://ollama.com) runs AI models locally on your computer — no API keys or internet required.
+
+1. **Install Ollama:**
+   - Download from [ollama.com](https://ollama.com)
+   - Or on Linux: `curl -fsSL https://ollama.com/install.sh | sh`
+
+2. **Pull a model:**
+   ```bash
+   ollama pull llama3.2
+   ```
+   Other good options: `mistral`, `gemma2`, `phi3`
+
+3. **Configure in Voice to Notes:**
+   - Go to **Settings > AI Provider**
+   - Select **Ollama**
+   - URL: `http://localhost:11434` (default, usually no change needed)
+   - Model: `llama3.2` (or whichever model you pulled)
+
+4. **Use:** Open the AI chat panel (right sidebar) and start asking questions
+
+### Cloud AI Providers
+
+If you prefer cloud-based AI:
+
+**OpenAI:**
+- Select **OpenAI** in Settings > AI Provider
+- Enter your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
+- Default model: `gpt-4o-mini`
+
+**Anthropic:**
+- Select **Anthropic** in Settings > AI Provider
+- Enter your API key from [console.anthropic.com](https://console.anthropic.com)
+- Default model: `claude-sonnet-4-6`
+
+**OpenAI Compatible:**
+- For any provider with an OpenAI-compatible API (vLLM, LiteLLM, etc.)
+- Enter the API base URL, key, and model name
+
+---
+
+## Settings Reference
+
+### Transcription
+
+| Setting | Options | Default |
+|---------|---------|---------|
+| Whisper Model | tiny, base, small, medium, large-v3 | base |
+| Device | CPU, CUDA | CPU |
+| Language | Auto-detect, or specify (en, es, fr, etc.) | Auto-detect |
+
+**Model recommendations:**
+- **tiny/base:** Fast, good for clear audio with one speaker
+- **small:** Best balance of speed and accuracy
+- **medium:** Better accuracy, noticeably slower
+- **large-v3:** Best accuracy, requires 8GB+ VRAM (GPU) or 16GB+ RAM (CPU)
+
+### Debug
+
+- **Enable Developer Tools:** Opens the browser inspector for debugging
+
+---
+
+## Troubleshooting
+
+### Transcription is slow
+- Use a smaller model (tiny or base)
+- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
+- Ensure you downloaded the CUDA sidecar during setup
+
+### Speaker detection not working
+- Verify your HuggingFace token in Settings > Speakers
+- Click "Test & Download Model" to re-download
+- Make sure you accepted the license on all three model pages
+
+### Audio won't play / No waveform
+- Check that the audio file still exists at its original location
+- Try re-importing the file
+- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA
+
+### App shows "Setting up Voice to Notes"
+- This is the first-launch sidecar download — it only happens once
+- If it fails, check your internet connection and click Retry
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "voice-to-notes",
-  "version": "0.1.0",
+  "version": "0.2.10",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "voice-to-notes",
-      "version": "0.1.0",
+      "version": "0.2.10",
      "license": "MIT",
      "dependencies": {
        "@tauri-apps/api": "^2",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "voice-to-notes",
-  "version": "0.1.0",
+  "version": "0.2.27",
  "description": "Desktop app for transcribing audio/video with speaker identification",
  "type": "module",
  "scripts": {
@@ -11,7 +11,9 @@
    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
    "lint": "eslint .",
    "test": "vitest",
-    "tauri": "tauri"
+    "tauri": "tauri",
+    "tauri:dev": "VOICE_TO_NOTES_DEV=1 tauri dev",
+    "sidecar:build": "cd python && python3 build_sidecar.py"
  },
  "license": "MIT",
  "dependencies": {
--- a/python/build_sidecar.py
+++ b/python/build_sidecar.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+"""Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
+
+Usage:
+    python build_sidecar.py [--cpu-only]
+
+Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
+sidecar binary and all dependencies. The main binary is renamed to include
+the Tauri target triple for externalBin resolution.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import platform
+import shutil
+import stat
+import subprocess
+import sys
+import urllib.request
+import zipfile
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+DIST_DIR = SCRIPT_DIR / "dist"
+BUILD_DIR = SCRIPT_DIR / "build"
+SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
+
+# Static ffmpeg download URLs (GPL-licensed builds)
+FFMPEG_URLS: dict[str, str] = {
+    "linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
+    "darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
+    "darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
+    "win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
+}
+
+
+def get_target_triple() -> str:
+    """Determine the Tauri-compatible target triple for the current platform."""
+    machine = platform.machine().lower()
+    system = platform.system().lower()
+
+    arch_map = {
+        "x86_64": "x86_64",
+        "amd64": "x86_64",
+        "aarch64": "aarch64",
+        "arm64": "aarch64",
+    }
+    arch = arch_map.get(machine, machine)
+
+    if system == "linux":
+        return f"{arch}-unknown-linux-gnu"
+    elif system == "darwin":
+        return f"{arch}-apple-darwin"
+    elif system == "windows":
+        return f"{arch}-pc-windows-msvc"
+    else:
+        return f"{arch}-unknown-{system}"
+
+
+def _has_uv() -> bool:
+    """Check if uv is available."""
+    try:
+        subprocess.run(["uv", "--version"], capture_output=True, check=True)
+        return True
+    except (FileNotFoundError, subprocess.CalledProcessError):
+        return False
+
+
+def create_venv_and_install(cpu_only: bool) -> Path:
+    """Create a fresh venv and install dependencies.
+
+    Uses uv if available (much faster), falls back to standard venv + pip.
+    """
+    venv_dir = BUILD_DIR / "sidecar-venv"
+    if venv_dir.exists():
+        shutil.rmtree(venv_dir)
+
+    use_uv = _has_uv()
+
+    if use_uv:
+        print(f"[build] Creating venv with uv at {venv_dir}")
+        subprocess.run(
+            ["uv", "venv", "--python", f"{sys.version_info.major}.{sys.version_info.minor}",
+             str(venv_dir)],
+            check=True,
+        )
+    else:
+        print(f"[build] Creating venv at {venv_dir}")
+        subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
+
+    # Determine python path inside venv
+    if sys.platform == "win32":
+        python = str(venv_dir / "Scripts" / "python.exe")
+    else:
+        python = str(venv_dir / "bin" / "python")
+
+    def pip_install(*args: str) -> None:
+        """Install packages. Pass package names and flags only, not 'install'."""
+        if use_uv:
+            # Use --python with the venv directory (not the python binary) for uv
+            subprocess.run(
+                ["uv", "pip", "install", "--python", str(venv_dir), *args],
+                check=True,
+            )
+        else:
+            subprocess.run([python, "-m", "pip", "install", *args], check=True)
+
+    if not use_uv:
+        # Upgrade pip (uv doesn't need this)
+        pip_install("--upgrade", "pip", "setuptools", "wheel")
+
+    # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
+    if cpu_only:
+        print("[build] Installing PyTorch (CPU-only)")
+        pip_install(
+            "torch", "torchaudio",
+            "--index-url", "https://download.pytorch.org/whl/cpu",
+        )
+    else:
+        print("[build] Installing PyTorch (CUDA 12.6)")
+        pip_install(
+            "torch", "torchaudio",
+            "--index-url", "https://download.pytorch.org/whl/cu126",
+        )
+
+    # Install project and dev deps (includes pyinstaller)
+    print("[build] Installing project dependencies")
+    pip_install("-e", f"{SCRIPT_DIR}[dev]")
+
+    return Path(python)
+
+
+def run_pyinstaller(python: Path) -> Path:
+    """Run PyInstaller using the spec file."""
+    print("[build] Running PyInstaller")
+    subprocess.run(
+        [str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
+        cwd=str(SCRIPT_DIR),
+        check=True,
+    )
+    output_dir = DIST_DIR / "voice-to-notes-sidecar"
+    if not output_dir.exists():
+        raise RuntimeError(f"PyInstaller output not found at {output_dir}")
+    return output_dir
+
+
+def download_ffmpeg(output_dir: Path) -> None:
+    """Download a static ffmpeg/ffprobe binary for the current platform."""
+    system = sys.platform
+    machine = platform.machine().lower()
+    if machine in ("amd64", "x86_64"):
+        machine = "x86_64"
+    elif machine in ("aarch64", "arm64"):
+        machine = "arm64"
+
+    key = f"{system}-{machine}"
+    if system == "win32":
+        key = f"win32-{machine}"
+    elif system == "linux":
+        key = f"linux-{machine}"
+
+    url = FFMPEG_URLS.get(key)
+    if not url:
+        print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
+        return
+
+    print(f"[build] Downloading ffmpeg for {key}")
+    tmp_path = output_dir / "ffmpeg_download"
+    try:
+        urllib.request.urlretrieve(url, str(tmp_path))
+
+        if url.endswith(".tar.xz"):
+            # Linux static build
+            import tarfile
+            with tarfile.open(str(tmp_path), "r:xz") as tar:
+                for member in tar.getmembers():
+                    basename = os.path.basename(member.name)
+                    if basename in ("ffmpeg", "ffprobe"):
+                        member.name = basename
+                        tar.extract(member, path=str(output_dir))
+                        dest = output_dir / basename
+                        dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
+        elif url.endswith(".zip"):
+            with zipfile.ZipFile(str(tmp_path), "r") as zf:
+                for name in zf.namelist():
+                    basename = os.path.basename(name)
+                    if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
+                        data = zf.read(name)
+                        dest = output_dir / basename
+                        dest.write_bytes(data)
+                        if sys.platform != "win32":
+                            dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
+        print("[build] ffmpeg downloaded successfully")
+    except Exception as e:
+        print(f"[build] Warning: Failed to download ffmpeg: {e}")
+    finally:
+        if tmp_path.exists():
+            tmp_path.unlink()
+
+
+def rename_binary(output_dir: Path, target_triple: str) -> None:
+    """Rename the main binary to include the target triple for Tauri."""
+    if sys.platform == "win32":
+        src = output_dir / "voice-to-notes-sidecar.exe"
+        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
+    else:
+        src = output_dir / "voice-to-notes-sidecar"
+        dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
+
+    if src.exists():
+        print(f"[build] Renaming {src.name} -> {dst.name}")
+        src.rename(dst)
+    else:
+        print(f"[build] Warning: Expected binary not found at {src}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
+    parser.add_argument(
+        "--cpu-only",
+        action="store_true",
+        default=True,
+        help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
+    )
+    parser.add_argument(
+        "--with-cuda",
+        action="store_true",
+        help="Install PyTorch with CUDA support",
+    )
+    args = parser.parse_args()
+    cpu_only = not args.with_cuda
+
+    target_triple = get_target_triple()
+    print(f"[build] Target triple: {target_triple}")
+    print(f"[build] CPU-only: {cpu_only}")
+
+    python = create_venv_and_install(cpu_only)
+    output_dir = run_pyinstaller(python)
+    download_ffmpeg(output_dir)
+
+    print(f"\n[build] Done! Sidecar built at: {output_dir}")
+    print(f"[build] Copy directory to src-tauri/sidecar/ for Tauri resource bundling")
+
+
+if __name__ == "__main__":
+    main()
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "1.0.10"
 description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
 requires-python = ">=3.11"
 license = "MIT"
@@ -13,6 +13,9 @@ dependencies = [
    "faster-whisper>=1.1.0",
    "pyannote.audio>=3.1.0",
    "pysubs2>=1.7.0",
+    "openai>=1.0.0",
+    "anthropic>=0.20.0",
+    "soundfile>=0.12.0",
 ]

 [project.optional-dependencies]
@@ -20,6 +23,7 @@ dev = [
    "ruff>=0.8.0",
    "pytest>=8.0.0",
    "pytest-asyncio>=0.24.0",
+    "pyinstaller>=6.0",
 ]

 [tool.ruff]
--- a/python/tests/test_diarize.py
+++ b/python/tests/test_diarize.py
@@ -1,7 +1,13 @@
 """Tests for diarization service data structures and payload conversion."""

+import time
+from unittest.mock import MagicMock, patch
+
+import pytest
+
 from voice_to_notes.services.diarize import (
    DiarizationResult,
+    DiarizeService,
    SpeakerSegment,
    diarization_to_payload,
 )
@@ -31,3 +37,74 @@ def test_diarization_to_payload_empty():
    assert payload["num_speakers"] == 0
    assert payload["speaker_segments"] == []
    assert payload["speakers"] == []
+
+
+def test_diarize_threading_progress(monkeypatch):
+    """Test that diarization emits progress while running in background thread."""
+    # Track written messages
+    written_messages = []
+    def mock_write(msg):
+        written_messages.append(msg)
+
+    # Mock pipeline that takes ~5 seconds
+    def slow_pipeline(file_path, **kwargs):
+        time.sleep(5)
+        # Return a mock diarization result (use spec=object to prevent
+        # hasattr returning True for speaker_diarization)
+        mock_result = MagicMock(spec=[])
+        mock_track = MagicMock()
+        mock_track.start = 0.0
+        mock_track.end = 5.0
+        mock_result.itertracks = MagicMock(return_value=[(mock_track, None, "SPEAKER_00")])
+        return mock_result
+
+    mock_pipeline_obj = MagicMock()
+    mock_pipeline_obj.side_effect = slow_pipeline
+
+    service = DiarizeService()
+    service._pipeline = mock_pipeline_obj
+
+    with patch("voice_to_notes.services.diarize.write_message", mock_write):
+        result = service.diarize(
+            request_id="req-1",
+            file_path="/fake/audio.wav",
+            audio_duration_sec=60.0,
+        )
+
+    # Filter for diarizing progress messages (not loading_diarization or done)
+    diarizing_msgs = [
+        m for m in written_messages
+        if m.type == "progress" and m.payload.get("stage") == "diarizing"
+        and "elapsed" in m.payload.get("message", "")
+    ]
+
+    # Should have at least 1 progress message (5s sleep / 2s interval = ~2 messages)
+    assert len(diarizing_msgs) >= 1, (
+        f"Expected at least 1 diarizing progress message, got {len(diarizing_msgs)}"
+    )
+
+    # Progress percent should be between 20 and 85
+    for msg in diarizing_msgs:
+        pct = msg.payload["percent"]
+        assert 20 <= pct <= 85, f"Progress {pct} out of expected range 20-85"
+
+    # Result should be valid
+    assert result.num_speakers == 1
+    assert result.speakers == ["SPEAKER_00"]
+
+
+def test_diarize_threading_error_propagation(monkeypatch):
+    """Test that errors from the background thread are properly raised."""
+    mock_pipeline_obj = MagicMock()
+    mock_pipeline_obj.side_effect = RuntimeError("Pipeline crashed")
+
+    service = DiarizeService()
+    service._pipeline = mock_pipeline_obj
+
+    with patch("voice_to_notes.services.diarize.write_message", lambda m: None):
+        with pytest.raises(RuntimeError, match="Pipeline crashed"):
+            service.diarize(
+                request_id="req-1",
+                file_path="/fake/audio.wav",
+                audio_duration_sec=30.0,
+            )
--- a/python/tests/test_messages.py
+++ b/python/tests/test_messages.py
@@ -3,8 +3,10 @@
 from voice_to_notes.ipc.messages import (
    IPCMessage,
    error_message,
+    partial_segment_message,
    progress_message,
    ready_message,
+    speaker_update_message,
 )


@@ -48,3 +50,16 @@ def test_ready_message():
    assert msg.type == "ready"
    assert msg.id == "system"
    assert "version" in msg.payload
+
+
+def test_partial_segment_message():
+    msg = partial_segment_message("req-1", {"index": 0, "text": "hello"})
+    assert msg.type == "pipeline.segment"
+    assert msg.payload["index"] == 0
+    assert msg.payload["text"] == "hello"
+
+
+def test_speaker_update_message():
+    msg = speaker_update_message("req-1", [{"index": 0, "speaker": "SPEAKER_00"}])
+    assert msg.type == "pipeline.speaker_update"
+    assert msg.payload["updates"][0]["speaker"] == "SPEAKER_00"
--- a/python/tests/test_pipeline.py
+++ b/python/tests/test_pipeline.py
@@ -88,3 +88,18 @@ def test_merge_results_no_speaker_segments():

    result = service._merge_results(transcription, [])
    assert result.segments[0].speaker is None
+
+
+def test_speaker_update_generation():
+    """Test that speaker updates are generated after merge."""
+    result = PipelineResult(
+        segments=[
+            PipelineSegment(text="Hello", start_ms=0, end_ms=1000, speaker="SPEAKER_00"),
+            PipelineSegment(text="World", start_ms=1000, end_ms=2000, speaker="SPEAKER_01"),
+            PipelineSegment(text="Foo", start_ms=2000, end_ms=3000, speaker=None),
+        ],
+    )
+    updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
+    assert len(updates) == 2
+    assert updates[0] == {"index": 0, "speaker": "SPEAKER_00"}
+    assert updates[1] == {"index": 1, "speaker": "SPEAKER_01"}
--- a/python/tests/test_protocol.py
+++ b/python/tests/test_protocol.py
@@ -5,16 +5,23 @@ import json

 from voice_to_notes.ipc.messages import IPCMessage
 from voice_to_notes.ipc.protocol import read_message, write_message
+import voice_to_notes.ipc.protocol as protocol


-def test_write_message(capsys):
-    msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
-    write_message(msg)
-    captured = capsys.readouterr()
-    parsed = json.loads(captured.out.strip())
-    assert parsed["id"] == "req-1"
-    assert parsed["type"] == "pong"
-    assert parsed["payload"]["ok"] is True
+def test_write_message():
+    buf = io.StringIO()
+    # Temporarily replace the IPC output stream
+    old_out = protocol._ipc_out
+    protocol._ipc_out = buf
+    try:
+        msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
+        write_message(msg)
+        parsed = json.loads(buf.getvalue().strip())
+        assert parsed["id"] == "req-1"
+        assert parsed["type"] == "pong"
+        assert parsed["payload"]["ok"] is True
+    finally:
+        protocol._ipc_out = old_out


 def test_read_message(monkeypatch):
--- a/python/tests/test_transcribe.py
+++ b/python/tests/test_transcribe.py
@@ -1,7 +1,10 @@
 """Tests for transcription service."""

+import inspect
+
 from voice_to_notes.services.transcribe import (
    SegmentResult,
+    TranscribeService,
    TranscriptionResult,
    WordResult,
    result_to_payload,
@@ -49,3 +52,149 @@ def test_result_to_payload_empty():
    assert payload["segments"] == []
    assert payload["language"] == ""
    assert payload["duration_ms"] == 0
+
+
+def test_on_segment_callback():
+    """Test that on_segment callback is invoked with correct SegmentResult and index."""
+    callback_args = []
+
+    def mock_callback(seg: SegmentResult, index: int):
+        callback_args.append((seg.text, index))
+
+    # Test that passing on_segment doesn't break the function signature
+    # (Full integration test would require mocking WhisperModel)
+    service = TranscribeService()
+    # Verify the parameter exists by checking the signature
+    sig = inspect.signature(service.transcribe)
+    assert "on_segment" in sig.parameters
+
+
+def test_progress_every_segment(monkeypatch):
+    """Verify a progress message is sent for every segment, not just every 5th."""
+    from unittest.mock import MagicMock, patch
+    from voice_to_notes.services.transcribe import TranscribeService
+
+    # Mock WhisperModel
+    mock_model = MagicMock()
+
+    # Create mock segments (8 of them to test > 5)
+    mock_segments = []
+    for i in range(8):
+        seg = MagicMock()
+        seg.start = i * 1.0
+        seg.end = (i + 1) * 1.0
+        seg.text = f"Segment {i}"
+        seg.words = []
+        mock_segments.append(seg)
+
+    # Mock info object
+    mock_info = MagicMock()
+    mock_info.language = "en"
+    mock_info.language_probability = 0.99
+    mock_info.duration = 8.0
+
+    mock_model.transcribe.return_value = (iter(mock_segments), mock_info)
+
+    # Track write_message calls
+    written_messages = []
+
+    def mock_write(msg):
+        written_messages.append(msg)
+
+    service = TranscribeService()
+    service._model = mock_model
+    service._current_model_name = "base"
+    service._current_device = "cpu"
+    service._current_compute_type = "int8"
+
+    with patch("voice_to_notes.services.transcribe.write_message", mock_write):
+        service.transcribe("req-1", "/fake/audio.wav")
+
+    # Filter for "transcribing" stage progress messages
+    transcribing_msgs = [
+        m for m in written_messages
+        if m.type == "progress" and m.payload.get("stage") == "transcribing"
+    ]
+
+    # Should have one per segment (8) + the initial "Starting transcription..." message
+    # The initial "Starting transcription..." is also stage "transcribing" — so 8 + 1 = 9
+    assert len(transcribing_msgs) >= 8, (
+        f"Expected at least 8 transcribing progress messages (one per segment), got {len(transcribing_msgs)}"
+    )
+
+
+def test_chunk_report_size_progress():
+    """Test CHUNK_REPORT_SIZE progress emission."""
+    from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE
+    assert CHUNK_REPORT_SIZE == 10
+
+
+def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch):
+    """Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel."""
+    from unittest.mock import MagicMock, patch
+    from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult
+
+    # Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each)
+    original_run = __import__("subprocess").run
+
+    def mock_subprocess_run(cmd, **kwargs):
+        if "ffprobe" in cmd:
+            result = MagicMock()
+            result.stdout = "700.0\n"
+            result.returncode = 0
+            return result
+        elif "ffmpeg" in cmd:
+            # Create an empty temp file (simulate chunk extraction)
+            # The output file is the last argument
+            import pathlib
+            output_file = cmd[-1]
+            pathlib.Path(output_file).touch()
+            result = MagicMock()
+            result.returncode = 0
+            return result
+        return original_run(cmd, **kwargs)
+
+    # Mock WhisperModel
+    mock_model = MagicMock()
+    def mock_transcribe_call(file_path, **kwargs):
+        mock_segments = []
+        for i in range(3):
+            seg = MagicMock()
+            seg.start = i * 1.0
+            seg.end = (i + 1) * 1.0
+            seg.text = f"Segment {i}"
+            seg.words = []
+            mock_segments.append(seg)
+        mock_info = MagicMock()
+        mock_info.language = "en"
+        mock_info.language_probability = 0.99
+        mock_info.duration = 300.0
+        return iter(mock_segments), mock_info
+
+    mock_model.transcribe = mock_transcribe_call
+
+    service = TranscribeService()
+    service._model = mock_model
+    service._current_model_name = "base"
+    service._current_device = "cpu"
+    service._current_compute_type = "int8"
+
+    written_messages = []
+    def mock_write(msg):
+        written_messages.append(msg)
+
+    with patch("subprocess.run", mock_subprocess_run), \
+         patch("voice_to_notes.services.transcribe.write_message", mock_write):
+        result = service.transcribe_chunked("req-1", "/fake/long_audio.wav")
+
+    # Should have segments from multiple chunks
+    assert len(result.segments) > 0
+
+    # Verify timestamp offsets — segments from chunk 1 should start at 0,
+    # segments from chunk 2 should be offset by 300000ms
+    if len(result.segments) > 3:
+        # Chunk 2 segments should have offset timestamps
+        assert result.segments[3].start_ms >= 300000
+
+    assert result.duration_ms == 700000
+    assert result.language == "en"
--- a/python/voice_to_notes.spec
+++ b/python/voice_to_notes.spec
@@ -0,0 +1,80 @@
+# -*- mode: python ; coding: utf-8 -*-
+"""PyInstaller spec for the Voice to Notes sidecar binary."""
+
+from PyInstaller.utils.hooks import collect_all
+
+block_cipher = None
+
+# Collect all files for packages that have shared libraries / data files
+# PyInstaller often misses these for ML packages
+ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
+faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
+    "faster_whisper"
+)
+pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
+soundfile_datas, soundfile_binaries, soundfile_hiddenimports = collect_all("soundfile")
+
+a = Analysis(
+    ["voice_to_notes/main.py"],
+    pathex=[],
+    binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries + soundfile_binaries,
+    datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas + soundfile_datas,
+    hiddenimports=[
+        "torch",
+        "torchaudio",
+        "soundfile",
+        "huggingface_hub",
+        "pysubs2",
+        "openai",
+        "anthropic",
+        "litellm",
+    ]
+    + ctranslate2_hiddenimports
+    + faster_whisper_hiddenimports
+    + pyannote_hiddenimports
+    + soundfile_hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[
+        "tkinter", "test", "pip", "setuptools",
+        # ctranslate2.converters imports torch at module level and causes
+        # circular import crashes under PyInstaller. These modules are only
+        # needed for model format conversion, never for inference.
+        "ctranslate2.converters",
+        # torchcodec is partially bundled by PyInstaller but non-functional
+        # (missing FFmpeg shared libs). Excluding it forces pyannote.audio
+        # to fall back to torchaudio for audio decoding.
+        "torchcodec",
+    ],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name="voice-to-notes-sidecar",
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+)
+
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name="voice-to-notes-sidecar",
+)
--- a/python/voice_to_notes/hardware/detect.py
+++ b/python/voice_to_notes/hardware/detect.py
@@ -2,7 +2,10 @@

 from __future__ import annotations

+import ctypes
 import os
+import platform
+import subprocess
 import sys
 from dataclasses import dataclass

@@ -21,6 +24,77 @@ class HardwareInfo:
    recommended_compute_type: str = "int8"


+def _detect_ram_mb() -> int:
+    """Detect total system RAM in MB (cross-platform).
+
+    Tries platform-specific methods in order:
+    1. Linux: read /proc/meminfo
+    2. macOS: sysctl hw.memsize
+    3. Windows: GlobalMemoryStatusEx via ctypes
+    4. Fallback: os.sysconf (most Unix systems)
+
+    Returns 0 if all methods fail.
+    """
+    # Linux: read /proc/meminfo
+    if sys.platform == "linux":
+        try:
+            with open("/proc/meminfo") as f:
+                for line in f:
+                    if line.startswith("MemTotal:"):
+                        # Value is in kB
+                        return int(line.split()[1]) // 1024
+        except (FileNotFoundError, ValueError, OSError):
+            pass
+
+    # macOS: sysctl hw.memsize (returns bytes)
+    if sys.platform == "darwin":
+        try:
+            result = subprocess.run(
+                ["sysctl", "-n", "hw.memsize"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            return int(result.stdout.strip()) // (1024 * 1024)
+        except (subprocess.SubprocessError, ValueError, OSError):
+            pass
+
+    # Windows: GlobalMemoryStatusEx via ctypes
+    if sys.platform == "win32":
+        try:
+
+            class MEMORYSTATUSEX(ctypes.Structure):
+                _fields_ = [
+                    ("dwLength", ctypes.c_ulong),
+                    ("dwMemoryLoad", ctypes.c_ulong),
+                    ("ullTotalPhys", ctypes.c_ulonglong),
+                    ("ullAvailPhys", ctypes.c_ulonglong),
+                    ("ullTotalPageFile", ctypes.c_ulonglong),
+                    ("ullAvailPageFile", ctypes.c_ulonglong),
+                    ("ullTotalVirtual", ctypes.c_ulonglong),
+                    ("ullAvailVirtual", ctypes.c_ulonglong),
+                    ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
+                ]
+
+            mem_status = MEMORYSTATUSEX()
+            mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
+            if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
+                return int(mem_status.ullTotalPhys) // (1024 * 1024)
+        except (AttributeError, OSError):
+            pass
+
+    # Fallback: os.sysconf (works on most Unix systems)
+    try:
+        page_size = os.sysconf("SC_PAGE_SIZE")
+        phys_pages = os.sysconf("SC_PHYS_PAGES")
+        if page_size > 0 and phys_pages > 0:
+            return (page_size * phys_pages) // (1024 * 1024)
+    except (ValueError, OSError, AttributeError):
+        pass
+
+    return 0
+
+
 def detect_hardware() -> HardwareInfo:
    """Detect available hardware and recommend model configuration."""
    info = HardwareInfo()
@@ -28,25 +102,26 @@ def detect_hardware() -> HardwareInfo:
    # CPU info
    info.cpu_cores = os.cpu_count() or 1

-    # RAM info
-    try:
-        with open("/proc/meminfo") as f:
-            for line in f:
-                if line.startswith("MemTotal:"):
-                    # Value is in kB
-                    info.ram_mb = int(line.split()[1]) // 1024
-                    break
-    except (FileNotFoundError, ValueError):
-        pass
+    # RAM info (cross-platform)
+    info.ram_mb = _detect_ram_mb()

-    # CUDA detection
+    # CUDA detection — verify runtime libraries actually work, not just torch detection
    try:
        import torch

        if torch.cuda.is_available():
-            info.has_cuda = True
-            info.cuda_device_name = torch.cuda.get_device_name(0)
-            info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
+            # Test that CUDA runtime libraries are actually loadable
+            try:
+                torch.zeros(1, device="cuda")
+                info.has_cuda = True
+                info.cuda_device_name = torch.cuda.get_device_name(0)
+                info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
+            except Exception as e:
+                print(
+                    f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
+                    file=sys.stderr,
+                    flush=True,
+                )
    except ImportError:
        print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)

--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -41,11 +41,15 @@ def ping_handler(msg: IPCMessage) -> IPCMessage:

 def make_transcribe_handler() -> HandlerFunc:
    """Create a transcription handler with a persistent TranscribeService."""
-    from voice_to_notes.services.transcribe import TranscribeService, result_to_payload
-
-    service = TranscribeService()
+    service = None

    def handler(msg: IPCMessage) -> IPCMessage:
+        nonlocal service
+        if service is None:
+            from voice_to_notes.services.transcribe import TranscribeService
+            service = TranscribeService()
+        from voice_to_notes.services.transcribe import result_to_payload
+
        payload = msg.payload
        result = service.transcribe(
            request_id=msg.id,
@@ -66,11 +70,15 @@ def make_transcribe_handler() -> HandlerFunc:

 def make_diarize_handler() -> HandlerFunc:
    """Create a diarization handler with a persistent DiarizeService."""
-    from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
-
-    service = DiarizeService()
+    service = None

    def handler(msg: IPCMessage) -> IPCMessage:
+        nonlocal service
+        if service is None:
+            from voice_to_notes.services.diarize import DiarizeService
+            service = DiarizeService()
+        from voice_to_notes.services.diarize import diarization_to_payload
+
        payload = msg.payload
        result = service.diarize(
            request_id=msg.id,
@@ -88,13 +96,90 @@ def make_diarize_handler() -> HandlerFunc:
    return handler


-def make_pipeline_handler() -> HandlerFunc:
-    """Create a full pipeline handler (transcribe + diarize + merge)."""
-    from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
-
-    service = PipelineService()
+def make_diarize_download_handler() -> HandlerFunc:
+    """Create a handler that downloads/validates the diarization model."""
+    import os

    def handler(msg: IPCMessage) -> IPCMessage:
+        payload = msg.payload
+        hf_token = payload.get("hf_token")
+
+        try:
+            import huggingface_hub
+
+            # Disable pyannote telemetry (has a bug in v4.0.4)
+            os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
+            from pyannote.audio import Pipeline
+
+            # Persist token globally so ALL huggingface_hub downloads use auth.
+            # Setting env var alone isn't enough — pyannote's internal sub-downloads
+            # (e.g. PLDA.from_pretrained) don't forward the token= parameter.
+            # login() writes the token to ~/.cache/huggingface/token which
+            # huggingface_hub reads automatically for all downloads.
+            if hf_token:
+                os.environ["HF_TOKEN"] = hf_token
+                huggingface_hub.login(token=hf_token, add_to_git_credential=False)
+
+            # Pre-download sub-models that pyannote loads internally.
+            # This ensures they're cached before Pipeline.from_pretrained
+            # tries to load them (where token forwarding can fail).
+            sub_models = [
+                "pyannote/segmentation-3.0",
+                "pyannote/speaker-diarization-community-1",
+            ]
+            for model_id in sub_models:
+                print(f"[sidecar] Pre-downloading {model_id}...", file=sys.stderr, flush=True)
+                huggingface_hub.snapshot_download(model_id, token=hf_token)
+
+            print("[sidecar] Downloading diarization pipeline...", file=sys.stderr, flush=True)
+            pipeline = Pipeline.from_pretrained(
+                "pyannote/speaker-diarization-3.1",
+                token=hf_token,
+            )
+            print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
+            return IPCMessage(
+                id=msg.id,
+                type="diarize.download.result",
+                payload={"ok": True},
+            )
+        except Exception as e:
+            error_msg = str(e)
+            print(f"[sidecar] Model download error: {error_msg}", file=sys.stderr, flush=True)
+            # Make common errors more user-friendly
+            if "403" in error_msg or "gated" in error_msg.lower():
+                # Try to extract the specific model name from the error
+                import re
+                model_match = re.search(r"pyannote/[\w-]+", error_msg)
+                if model_match:
+                    model_name = model_match.group(0)
+                    error_msg = (
+                        f"Access denied for {model_name}. "
+                        f"Please visit huggingface.co/{model_name} "
+                        f"and accept the license agreement, then try again."
+                    )
+                else:
+                    error_msg = (
+                        "Access denied. Please accept the license agreements for all "
+                        "required pyannote models on HuggingFace."
+                    )
+            elif "401" in error_msg:
+                error_msg = "Invalid token. Please check your HuggingFace token."
+            return error_message(msg.id, "download_error", error_msg)
+
+    return handler
+
+
+def make_pipeline_handler() -> HandlerFunc:
+    """Create a full pipeline handler (transcribe + diarize + merge)."""
+    service = None
+
+    def handler(msg: IPCMessage) -> IPCMessage:
+        nonlocal service
+        if service is None:
+            from voice_to_notes.services.pipeline import PipelineService
+            service = PipelineService()
+        from voice_to_notes.services.pipeline import pipeline_result_to_payload
+
        payload = msg.payload
        result = service.run(
            request_id=msg.id,
@@ -107,6 +192,7 @@ def make_pipeline_handler() -> HandlerFunc:
            min_speakers=payload.get("min_speakers"),
            max_speakers=payload.get("max_speakers"),
            skip_diarization=payload.get("skip_diarization", False),
+            hf_token=payload.get("hf_token"),
        )
        return IPCMessage(
            id=msg.id,
@@ -119,11 +205,15 @@ def make_pipeline_handler() -> HandlerFunc:

 def make_export_handler() -> HandlerFunc:
    """Create an export handler."""
-    from voice_to_notes.services.export import ExportService, make_export_request
-
-    service = ExportService()
+    service = None

    def handler(msg: IPCMessage) -> IPCMessage:
+        nonlocal service
+        if service is None:
+            from voice_to_notes.services.export import ExportService
+            service = ExportService()
+        from voice_to_notes.services.export import make_export_request
+
        request = make_export_request(msg.payload)
        output_path = service.export(request)
        return IPCMessage(
@@ -137,11 +227,14 @@ def make_export_handler() -> HandlerFunc:

 def make_ai_chat_handler() -> HandlerFunc:
    """Create an AI chat handler with persistent AIProviderService."""
-    from voice_to_notes.services.ai_provider import create_default_service
-
-    service = create_default_service()
+    service = None

    def handler(msg: IPCMessage) -> IPCMessage:
+        nonlocal service
+        if service is None:
+            from voice_to_notes.services.ai_provider import create_default_service
+            service = create_default_service()
+
        payload = msg.payload
        action = payload.get("action", "chat")

@@ -186,10 +279,12 @@ def make_ai_chat_handler() -> HandlerFunc:
                    model=config.get("model", "claude-sonnet-4-6"),
                ))
            elif provider_name == "litellm":
-                from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+                from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider

-                service.register_provider("litellm", LiteLLMProvider(
+                service.register_provider("litellm", OpenAICompatibleProvider(
                    model=config.get("model", "gpt-4o-mini"),
+                    api_key=config.get("api_key"),
+                    api_base=config.get("api_base"),
                ))
            return IPCMessage(
                id=msg.id,
--- a/python/voice_to_notes/ipc/messages.py
+++ b/python/voice_to_notes/ipc/messages.py
@@ -34,6 +34,14 @@ def progress_message(request_id: str, percent: int, stage: str, message: str) ->
    )


+def partial_segment_message(request_id: str, segment_data: dict) -> IPCMessage:
+    return IPCMessage(id=request_id, type="pipeline.segment", payload=segment_data)
+
+
+def speaker_update_message(request_id: str, updates: list[dict]) -> IPCMessage:
+    return IPCMessage(id=request_id, type="pipeline.speaker_update", payload={"updates": updates})
+
+
 def error_message(request_id: str, code: str, message: str) -> IPCMessage:
    return IPCMessage(
        id=request_id,
--- a/python/voice_to_notes/ipc/protocol.py
+++ b/python/voice_to_notes/ipc/protocol.py
@@ -1,13 +1,53 @@
-"""JSON-line protocol reader/writer over stdin/stdout."""
+"""JSON-line protocol reader/writer over stdin/stdout.
+
+IMPORTANT: stdout is reserved exclusively for IPC messages.
+At init time we save the real stdout, then redirect sys.stdout → stderr
+so that any rogue print() calls from libraries don't corrupt the IPC stream.
+"""

 from __future__ import annotations

+import io
 import json
+import os
 import sys
 from typing import Any

 from voice_to_notes.ipc.messages import IPCMessage

+# Save the real stdout fd for IPC before any library can pollute it.
+# Then redirect sys.stdout to stderr so library prints go to stderr.
+_ipc_out: io.TextIOWrapper | None = None
+
+
+def init_ipc() -> None:
+    """Capture real stdout for IPC and redirect sys.stdout to stderr.
+
+    Must be called once at sidecar startup, before importing any ML libraries.
+    """
+    global _ipc_out
+    if _ipc_out is not None:
+        return  # already initialised
+
+    # Duplicate the real stdout fd so we keep it even after redirect
+    real_stdout_fd = os.dup(sys.stdout.fileno())
+    _ipc_out = io.TextIOWrapper(
+        io.BufferedWriter(io.FileIO(real_stdout_fd, "w")),
+        encoding="utf-8",
+        line_buffering=True,
+    )
+
+    # Redirect sys.stdout → stderr so print() from libraries goes to stderr
+    sys.stdout = sys.stderr
+
+
+def _get_ipc_out() -> io.TextIOWrapper:
+    """Return the IPC output stream, falling back to sys.__stdout__."""
+    if _ipc_out is not None:
+        return _ipc_out
+    # Fallback if init_ipc() was never called (e.g. in tests)
+    return sys.__stdout__
+

 def read_message() -> IPCMessage | None:
    """Read a single JSON-line message from stdin. Returns None on EOF."""
@@ -29,17 +69,19 @@ def read_message() -> IPCMessage | None:


 def write_message(msg: IPCMessage) -> None:
-    """Write a JSON-line message to stdout."""
+    """Write a JSON-line message to the IPC channel (real stdout)."""
+    out = _get_ipc_out()
    line = json.dumps(msg.to_dict(), separators=(",", ":"))
-    sys.stdout.write(line + "\n")
-    sys.stdout.flush()
+    out.write(line + "\n")
+    out.flush()


 def write_dict(data: dict[str, Any]) -> None:
-    """Write a raw dict as a JSON-line message to stdout."""
+    """Write a raw dict as a JSON-line message to the IPC channel."""
+    out = _get_ipc_out()
    line = json.dumps(data, separators=(",", ":"))
-    sys.stdout.write(line + "\n")
-    sys.stdout.flush()
+    out.write(line + "\n")
+    out.flush()


 def _log(message: str) -> None:
--- a/python/voice_to_notes/main.py
+++ b/python/voice_to_notes/main.py
@@ -5,18 +5,26 @@ from __future__ import annotations
 import signal
 import sys

-from voice_to_notes.ipc.handlers import (
+
+# CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
+# that might print to stdout and corrupt the JSON-line protocol.
+from voice_to_notes.ipc.protocol import init_ipc
+
+init_ipc()
+
+from voice_to_notes.ipc.handlers import (  # noqa: E402
    HandlerRegistry,
    hardware_detect_handler,
    make_ai_chat_handler,
+    make_diarize_download_handler,
    make_diarize_handler,
    make_export_handler,
    make_pipeline_handler,
    make_transcribe_handler,
    ping_handler,
 )
-from voice_to_notes.ipc.messages import ready_message
-from voice_to_notes.ipc.protocol import read_message, write_message
+from voice_to_notes.ipc.messages import ready_message  # noqa: E402
+from voice_to_notes.ipc.protocol import read_message, write_message  # noqa: E402


 def create_registry() -> HandlerRegistry:
@@ -26,6 +34,7 @@ def create_registry() -> HandlerRegistry:
    registry.register("transcribe.start", make_transcribe_handler())
    registry.register("hardware.detect", hardware_detect_handler)
    registry.register("diarize.start", make_diarize_handler())
+    registry.register("diarize.download", make_diarize_download_handler())
    registry.register("pipeline.start", make_pipeline_handler())
    registry.register("export.start", make_export_handler())
    registry.register("ai.chat", make_ai_chat_handler())
--- a/python/voice_to_notes/providers/litellm_provider.py
+++ b/python/voice_to_notes/providers/litellm_provider.py
@@ -1,4 +1,4 @@
-"""LiteLLM provider — multi-provider gateway."""
+"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""

 from __future__ import annotations

@@ -7,36 +7,44 @@ from typing import Any
 from voice_to_notes.providers.base import AIProvider


-class LiteLLMProvider(AIProvider):
-    """Routes through LiteLLM for access to 100+ LLM providers."""
+class OpenAICompatibleProvider(AIProvider):
+    """Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""

-    def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
+    def __init__(
+        self,
+        api_key: str | None = None,
+        api_base: str | None = None,
+        model: str = "gpt-4o-mini",
+        **kwargs: Any,
+    ) -> None:
+        self._api_key = api_key or "sk-no-key"
+        self._api_base = api_base
        self._model = model
        self._extra_kwargs = kwargs

    def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
-        try:
-            import litellm
-        except ImportError:
-            raise RuntimeError("litellm package is required. Install with: pip install litellm")
+        from openai import OpenAI

-        merged_kwargs = {**self._extra_kwargs, **kwargs}
-        response = litellm.completion(
-            model=merged_kwargs.get("model", self._model),
+        client_kwargs: dict[str, Any] = {"api_key": self._api_key}
+        if self._api_base:
+            client_kwargs["base_url"] = self._api_base
+
+        client = OpenAI(**client_kwargs)
+        response = client.chat.completions.create(
+            model=kwargs.get("model", self._model),
            messages=messages,
-            temperature=merged_kwargs.get("temperature", 0.7),
-            max_tokens=merged_kwargs.get("max_tokens", 2048),
+            temperature=kwargs.get("temperature", 0.7),
+            max_tokens=kwargs.get("max_tokens", 2048),
        )
        return response.choices[0].message.content or ""

    def is_available(self) -> bool:
        try:
-            import litellm  # noqa: F401
-
-            return True
+            import openai  # noqa: F401
+            return bool(self._api_key and self._api_base)
        except ImportError:
            return False

    @property
    def name(self) -> str:
-        return "LiteLLM"
+        return "OpenAI Compatible"
--- a/python/voice_to_notes/providers/local_provider.py
+++ b/python/voice_to_notes/providers/local_provider.py
@@ -1,4 +1,4 @@
-"""Local AI provider — bundled llama-server (OpenAI-compatible API)."""
+"""Local AI provider — Ollama or any OpenAI-compatible API."""

 from __future__ import annotations

@@ -9,9 +9,9 @@ from voice_to_notes.providers.base import AIProvider


 class LocalProvider(AIProvider):
-    """Connects to bundled llama-server via its OpenAI-compatible API."""
+    """Connects to Ollama or any OpenAI-compatible API server."""

-    def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
+    def __init__(self, base_url: str = "http://localhost:11434/v1", model: str = "llama3.2") -> None:
        self._base_url = base_url.rstrip("/")
        self._model = model
        self._client: Any = None
@@ -24,8 +24,8 @@ class LocalProvider(AIProvider):
            from openai import OpenAI

            self._client = OpenAI(
-                base_url=f"{self._base_url}/v1",
-                api_key="not-needed",  # llama-server doesn't require an API key
+                base_url=self._base_url,
+                api_key="ollama",  # Ollama doesn't require a real key
            )
        except ImportError:
            raise RuntimeError(
@@ -47,7 +47,9 @@ class LocalProvider(AIProvider):
        try:
            import urllib.request

-            req = urllib.request.Request(f"{self._base_url}/health", method="GET")
+            # Check base URL without /v1 suffix for Ollama root endpoint
+            root_url = self._base_url.replace("/v1", "")
+            req = urllib.request.Request(root_url, method="GET")
            with urllib.request.urlopen(req, timeout=2) as resp:
                return resp.status == 200
        except Exception:
@@ -55,4 +57,4 @@ class LocalProvider(AIProvider):

    @property
    def name(self) -> str:
-        return "Local (llama-server)"
+        return "Ollama"
--- a/python/voice_to_notes/services/ai_provider.py
+++ b/python/voice_to_notes/services/ai_provider.py
@@ -92,7 +92,7 @@ class AIProviderService:
 def create_default_service() -> AIProviderService:
    """Create an AIProviderService with all supported providers registered."""
    from voice_to_notes.providers.anthropic_provider import AnthropicProvider
-    from voice_to_notes.providers.litellm_provider import LiteLLMProvider
+    from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
    from voice_to_notes.providers.local_provider import LocalProvider
    from voice_to_notes.providers.openai_provider import OpenAIProvider

@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
    service.register_provider("local", LocalProvider())
    service.register_provider("openai", OpenAIProvider())
    service.register_provider("anthropic", AnthropicProvider())
-    service.register_provider("litellm", LiteLLMProvider())
+    service.register_provider("litellm", OpenAICompatibleProvider())
    return service
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -2,14 +2,134 @@

 from __future__ import annotations

+import os
+import subprocess
 import sys
+import tempfile
+import threading
 import time
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any

+# Disable pyannote telemetry — it has a bug in v4.0.4 where
+# np.isfinite(None) crashes when max_speakers is not set.
+os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
+
+from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message

+_patched = False
+
+
+def _patch_pyannote_audio() -> None:
+    """Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
+
+    pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
+    unconditionally even when torchcodec is not installed, causing NameError.
+    This replaces the Audio.__call__ method with a torchaudio-based version.
+    """
+    global _patched
+    if _patched:
+        return
+    _patched = True
+
+    try:
+        import numpy as np
+        import soundfile as sf
+        import torch
+        from pyannote.audio.core.io import Audio
+
+        def _sf_load(audio_path: str) -> tuple:
+            """Load audio via soundfile, return (channels, samples) tensor + sample_rate."""
+            data, sample_rate = sf.read(str(audio_path), dtype="float32")
+            waveform = torch.from_numpy(np.array(data))
+            if waveform.ndim == 1:
+                waveform = waveform.unsqueeze(0)
+            else:
+                waveform = waveform.T
+            return waveform, sample_rate
+
+        def _soundfile_call(self, file: dict) -> tuple:
+            """Replacement for Audio.__call__."""
+            return _sf_load(file["audio"])
+
+        def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
+            """Replacement for Audio.crop — load full file then slice.
+
+            Pads short segments with zeros to match the expected duration,
+            which pyannote requires for batched embedding extraction.
+            """
+            duration = kwargs.get("duration", None)
+            waveform, sample_rate = _sf_load(file["audio"])
+            # Convert segment (seconds) to sample indices
+            start_sample = int(segment.start * sample_rate)
+            end_sample = int(segment.end * sample_rate)
+            # Clamp to bounds
+            start_sample = max(0, start_sample)
+            end_sample = min(waveform.shape[-1], end_sample)
+            cropped = waveform[:, start_sample:end_sample]
+            # Pad to expected duration if needed (pyannote batches require uniform size)
+            if duration is not None:
+                expected_samples = int(duration * sample_rate)
+            else:
+                expected_samples = int((segment.end - segment.start) * sample_rate)
+            if cropped.shape[-1] < expected_samples:
+                pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1])
+                cropped = torch.cat([cropped, pad], dim=-1)
+            return cropped, sample_rate
+
+        Audio.__call__ = _soundfile_call  # type: ignore[assignment]
+        Audio.crop = _soundfile_crop  # type: ignore[assignment]
+        print("[sidecar] Patched pyannote Audio to use soundfile", file=sys.stderr, flush=True)
+    except Exception as e:
+        print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
+
+
+def _ensure_wav(file_path: str) -> tuple[str, str | None]:
+    """Convert audio to 16kHz mono WAV if needed.
+
+    pyannote.audio v4.0.4 has a bug where its AudioDecoder returns
+    duration=None for some formats (FLAC, etc.), causing crashes.
+    Converting to WAV ensures the duration header is always present.
+
+    Returns:
+        (path_to_use, temp_path_or_None)
+        If conversion was needed, temp_path is the WAV file to clean up.
+    """
+    ext = Path(file_path).suffix.lower()
+    if ext == ".wav":
+        return file_path, None
+
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    tmp.close()
+    try:
+        subprocess.run(
+            [
+                get_ffmpeg_path(), "-y", "-i", file_path,
+                "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
+                tmp.name,
+            ],
+            check=True,
+            capture_output=True,
+        )
+        print(
+            f"[sidecar] Converted {ext} to WAV for diarization",
+            file=sys.stderr,
+            flush=True,
+        )
+        return tmp.name, tmp.name
+    except (subprocess.CalledProcessError, FileNotFoundError) as e:
+        # ffmpeg not available or failed — try original file and hope for the best
+        print(
+            f"[sidecar] WAV conversion failed ({e}), using original file",
+            file=sys.stderr,
+            flush=True,
+        )
+        os.unlink(tmp.name)
+        return file_path, None
+

@dataclass
 class SpeakerSegment:
@@ -35,45 +155,60 @@ class DiarizeService:
    def __init__(self) -> None:
        self._pipeline: Any = None

-    def _ensure_pipeline(self) -> Any:
+    def _ensure_pipeline(self, hf_token: str | None = None) -> Any:
        """Load the pyannote diarization pipeline (lazy)."""
        if self._pipeline is not None:
            return self._pipeline

        print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)

-        try:
-            from pyannote.audio import Pipeline
+        # Use token from argument, fall back to environment variable
+        if not hf_token:
+            hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None

-            self._pipeline = Pipeline.from_pretrained(
-                "pyannote/speaker-diarization-3.1",
-                use_auth_token=False,
-            )
-        except Exception:
-            # Fall back to a simpler approach if the model isn't available
-            # pyannote requires HuggingFace token for some models
-            # Try the community model first
+        # Persist token globally so ALL huggingface_hub sub-downloads use auth.
+        # Pyannote has internal dependencies that don't forward the token= param.
+        if hf_token:
+            os.environ["HF_TOKEN"] = hf_token
+            import huggingface_hub
+            huggingface_hub.login(token=hf_token, add_to_git_credential=False)
+
+        models = [
+            "pyannote/speaker-diarization-3.1",
+            "pyannote/speaker-diarization",
+        ]
+
+        last_error: Exception | None = None
+        _patch_pyannote_audio()
+        for model_name in models:
            try:
                from pyannote.audio import Pipeline

-                self._pipeline = Pipeline.from_pretrained(
-                    "pyannote/speaker-diarization",
-                    use_auth_token=False,
-                )
+                self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
+                print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
+                # Move pipeline to GPU if available
+                try:
+                    import torch
+                    if torch.cuda.is_available():
+                        self._pipeline = self._pipeline.to(torch.device("cuda"))
+                        print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
+                except Exception as e:
+                    print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
+                return self._pipeline
            except Exception as e:
+                last_error = e
                print(
-                    f"[sidecar] Warning: Could not load pyannote pipeline: {e}",
+                    f"[sidecar] Warning: Could not load {model_name}: {e}",
                    file=sys.stderr,
                    flush=True,
                )
-                raise RuntimeError(
-                    "pyannote.audio pipeline not available. "
-                    "You may need to accept the model license at "
-                    "https://huggingface.co/pyannote/speaker-diarization-3.1 "
-                    "and set a HF_TOKEN environment variable."
-                ) from e

-        return self._pipeline
+        raise RuntimeError(
+            "pyannote.audio pipeline not available. "
+            "You may need to accept the model license at "
+            "https://huggingface.co/pyannote/speaker-diarization-3.1 "
+            "and set a HF_TOKEN environment variable."
+        ) from last_error

    def diarize(
        self,
@@ -82,6 +217,8 @@ class DiarizeService:
        num_speakers: int | None = None,
        min_speakers: int | None = None,
        max_speakers: int | None = None,
+        hf_token: str | None = None,
+        audio_duration_sec: float | None = None,
    ) -> DiarizationResult:
        """Run speaker diarization on an audio file.

@@ -99,7 +236,7 @@ class DiarizeService:
            progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
        )

-        pipeline = self._ensure_pipeline()
+        pipeline = self._ensure_pipeline(hf_token=hf_token)

        write_message(
            progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
@@ -116,8 +253,55 @@ class DiarizeService:
        if max_speakers is not None:
            kwargs["max_speakers"] = max_speakers

-        # Run diarization
-        diarization = pipeline(file_path, **kwargs)
+        # Convert to WAV to work around pyannote v4.0.4 duration bug
+        audio_path, temp_wav = _ensure_wav(file_path)
+
+        print(
+            f"[sidecar] Running diarization on {audio_path} with kwargs: {kwargs}",
+            file=sys.stderr,
+            flush=True,
+        )
+
+        # Run diarization in background thread for progress reporting
+        result_holder: list = [None]
+        error_holder: list[Exception | None] = [None]
+        done_event = threading.Event()
+
+        def _run():
+            try:
+                result_holder[0] = pipeline(audio_path, **kwargs)
+            except Exception as e:
+                error_holder[0] = e
+            finally:
+                done_event.set()
+
+        thread = threading.Thread(target=_run, daemon=True)
+        thread.start()
+
+        elapsed = 0.0
+        estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
+        while not done_event.wait(timeout=2.0):
+            elapsed += 2.0
+            pct = min(20 + int((elapsed / estimated_total) * 65), 85)
+            write_message(progress_message(
+                request_id, pct, "diarizing",
+                f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
+
+        thread.join()
+
+        # Clean up temp file
+        if temp_wav:
+            os.unlink(temp_wav)
+
+        if error_holder[0] is not None:
+            raise error_holder[0]
+        raw_result = result_holder[0]
+
+        # pyannote 4.0+ returns DiarizeOutput; older versions return Annotation directly
+        if hasattr(raw_result, "speaker_diarization"):
+            diarization = raw_result.speaker_diarization
+        else:
+            diarization = raw_result

        # Convert pyannote output to our format
        result = DiarizationResult()
--- a/python/voice_to_notes/services/pipeline.py
+++ b/python/voice_to_notes/services/pipeline.py
@@ -2,13 +2,19 @@

 from __future__ import annotations

+import concurrent.futures
 import sys
 import time
 from dataclasses import dataclass, field
 from typing import Any

-from voice_to_notes.ipc.messages import progress_message
+from voice_to_notes.ipc.messages import (
+    partial_segment_message,
+    progress_message,
+    speaker_update_message,
+)
 from voice_to_notes.ipc.protocol import write_message
+from voice_to_notes.utils.ffmpeg import get_ffprobe_path
 from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
 from voice_to_notes.services.transcribe import (
    SegmentResult,
@@ -60,6 +66,7 @@ class PipelineService:
        min_speakers: int | None = None,
        max_speakers: int | None = None,
        skip_diarization: bool = False,
+        hf_token: str | None = None,
    ) -> PipelineResult:
        """Run the full transcription + diarization pipeline.

@@ -77,22 +84,59 @@ class PipelineService:
        """
        start_time = time.time()

-        # Step 1: Transcribe
+        # Step 0: Probe audio duration for conditional chunked transcription
        write_message(
            progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
        )

-        transcription = self._transcribe_service.transcribe(
-            request_id=request_id,
-            file_path=file_path,
-            model_name=model_name,
-            device=device,
-            compute_type=compute_type,
-            language=language,
-        )
+        def _emit_segment(seg: SegmentResult, index: int) -> None:
+            write_message(partial_segment_message(request_id, {
+                "index": index,
+                "text": seg.text,
+                "start_ms": seg.start_ms,
+                "end_ms": seg.end_ms,
+                "words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
+            }))
+
+        audio_duration_sec = None
+        try:
+            import subprocess
+            probe_result = subprocess.run(
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
+                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
+                capture_output=True, text=True, check=True,
+            )
+            audio_duration_sec = float(probe_result.stdout.strip())
+        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
+            pass
+
+        def _run_transcription() -> TranscriptionResult:
+            """Run transcription (chunked or standard based on duration)."""
+            from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
+            if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
+                return self._transcribe_service.transcribe_chunked(
+                    request_id=request_id,
+                    file_path=file_path,
+                    model_name=model_name,
+                    device=device,
+                    compute_type=compute_type,
+                    language=language,
+                    on_segment=_emit_segment,
+                )
+            else:
+                return self._transcribe_service.transcribe(
+                    request_id=request_id,
+                    file_path=file_path,
+                    model_name=model_name,
+                    device=device,
+                    compute_type=compute_type,
+                    language=language,
+                    on_segment=_emit_segment,
+                )

        if skip_diarization:
-            # Convert transcription directly without speaker labels
+            # Sequential: transcribe only, no diarization needed
+            transcription = _run_transcription()
            result = PipelineResult(
                language=transcription.language,
                language_probability=transcription.language_probability,
@@ -110,37 +154,63 @@ class PipelineService:
                )
            return result

-        # Step 2: Diarize (with graceful fallback)
+        # Parallel execution: run transcription (0-45%) and diarization (45-90%)
+        # concurrently, then merge (90-100%).
        write_message(
-            progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
+            progress_message(
+                request_id, 0, "pipeline",
+                "Starting transcription and diarization in parallel..."
+            )
        )

        diarization = None
-        try:
-            diarization = self._diarize_service.diarize(
+        diarization_error = None
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+            transcription_future = executor.submit(_run_transcription)
+
+            # Use probed audio_duration_sec for diarization progress estimation
+            # (transcription hasn't finished yet, so we can't use transcription.duration_ms)
+            diarization_future = executor.submit(
+                self._diarize_service.diarize,
                request_id=request_id,
                file_path=file_path,
                num_speakers=num_speakers,
                min_speakers=min_speakers,
                max_speakers=max_speakers,
+                hf_token=hf_token,
+                audio_duration_sec=audio_duration_sec,
            )
-        except Exception as e:
-            print(
-                f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
-                file=sys.stderr,
-                flush=True,
-            )
+
+            # Wait for both futures. We need the transcription result regardless,
+            # but diarization may fail gracefully.
+            transcription = transcription_future.result()
            write_message(
-                progress_message(
-                    request_id, 80, "pipeline",
-                    "Diarization unavailable, using transcription only..."
-                )
+                progress_message(request_id, 45, "pipeline", "Transcription complete")
            )

+            try:
+                diarization = diarization_future.result()
+            except Exception as e:
+                import traceback
+                diarization_error = e
+                print(
+                    f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                traceback.print_exc(file=sys.stderr)
+                write_message(
+                    progress_message(
+                        request_id, 80, "pipeline",
+                        f"Diarization failed ({e}), using transcription only..."
+                    )
+                )
+
        # Step 3: Merge (or skip if diarization failed)
        if diarization is not None:
            write_message(
-                progress_message(request_id, 90, "pipeline", "Merging transcript with speakers...")
+                progress_message(request_id, 90, "merging", "Merging transcript with speakers...")
            )
            result = self._merge_results(transcription, diarization.speaker_segments)
            result.speakers = diarization.speakers
@@ -170,6 +240,10 @@ class PipelineService:
            flush=True,
        )

+        updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
+        if updates:
+            write_message(speaker_update_message(request_id, updates))
+
        write_message(
            progress_message(request_id, 100, "done", "Pipeline complete")
        )
--- a/python/voice_to_notes/services/transcribe.py
+++ b/python/voice_to_notes/services/transcribe.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import sys
 import time
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import Any

@@ -11,6 +12,10 @@ from faster_whisper import WhisperModel

 from voice_to_notes.ipc.messages import progress_message
 from voice_to_notes.ipc.protocol import write_message
+from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path
+
+CHUNK_REPORT_SIZE = 10
+LARGE_FILE_THRESHOLD_SEC = 3600  # 1 hour


@dataclass
@@ -72,11 +77,28 @@ class TranscribeService:
            file=sys.stderr,
            flush=True,
        )
-        self._model = WhisperModel(
-            model_name,
-            device=device,
-            compute_type=compute_type,
-        )
+        try:
+            self._model = WhisperModel(
+                model_name,
+                device=device,
+                compute_type=compute_type,
+            )
+        except Exception as e:
+            if device != "cpu":
+                print(
+                    f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                device = "cpu"
+                compute_type = "int8"
+                self._model = WhisperModel(
+                    model_name,
+                    device=device,
+                    compute_type=compute_type,
+                )
+            else:
+                raise
        self._current_model_name = model_name
        self._current_device = device
        self._current_compute_type = compute_type
@@ -90,6 +112,7 @@ class TranscribeService:
        device: str = "cpu",
        compute_type: str = "int8",
        language: str | None = None,
+        on_segment: Callable[[SegmentResult, int], None] | None = None,
    ) -> TranscriptionResult:
        """Transcribe an audio file with word-level timestamps.

@@ -145,16 +168,23 @@ class TranscribeService:
                )
            )

-            # Send progress every few segments
-            if segment_count % 5 == 0:
-                write_message(
-                    progress_message(
-                        request_id,
-                        progress_pct,
-                        "transcribing",
-                        f"Processed {segment_count} segments...",
-                    )
+            if on_segment:
+                on_segment(result.segments[-1], segment_count - 1)
+
+            write_message(
+                progress_message(
+                    request_id,
+                    progress_pct,
+                    "transcribing",
+                    f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
                )
+            )
+
+            if segment_count % CHUNK_REPORT_SIZE == 0:
+                write_message(progress_message(
+                    request_id, progress_pct, "transcribing",
+                    f"Completed chunk of {CHUNK_REPORT_SIZE} segments "
+                    f"({segment_count} total, {progress_pct}% of audio)..."))

        elapsed = time.time() - start_time
        print(
@@ -166,6 +196,113 @@ class TranscribeService:
        write_message(progress_message(request_id, 100, "done", "Transcription complete"))
        return result

+    def transcribe_chunked(
+        self,
+        request_id: str,
+        file_path: str,
+        model_name: str = "base",
+        device: str = "cpu",
+        compute_type: str = "int8",
+        language: str | None = None,
+        on_segment: Callable[[SegmentResult, int], None] | None = None,
+        chunk_duration_sec: int = 300,
+    ) -> TranscriptionResult:
+        """Transcribe a large audio file by splitting into chunks.
+
+        Uses ffmpeg to split the file into chunks, transcribes each chunk,
+        then merges the results with corrected timestamps.
+
+        Falls back to standard transcribe() if ffmpeg is not available.
+        """
+        import subprocess
+        import tempfile
+
+        # Get total duration via ffprobe
+        try:
+            probe_result = subprocess.run(
+                [get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
+                 "-of", "default=noprint_wrappers=1:nokey=1", file_path],
+                capture_output=True, text=True, check=True,
+            )
+            total_duration = float(probe_result.stdout.strip())
+        except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
+            # ffprobe not available or failed — fall back to standard transcription
+            write_message(progress_message(
+                request_id, 5, "transcribing",
+                "ffmpeg not available, using standard transcription..."))
+            return self.transcribe(request_id, file_path, model_name, device,
+                                   compute_type, language, on_segment=on_segment)
+
+        num_chunks = max(1, int(total_duration / chunk_duration_sec) + 1)
+        write_message(progress_message(
+            request_id, 5, "transcribing",
+            f"Splitting {total_duration:.0f}s file into {num_chunks} chunks..."))
+
+        merged_result = TranscriptionResult()
+        global_segment_index = 0
+
+        for chunk_idx in range(num_chunks):
+            chunk_start = chunk_idx * chunk_duration_sec
+            if chunk_start >= total_duration:
+                break
+
+            chunk_start_ms = int(chunk_start * 1000)
+
+            # Extract chunk to temp file
+            tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+            tmp.close()
+            try:
+                subprocess.run(
+                    [get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
+                     "-t", str(chunk_duration_sec),
+                     "-i", file_path,
+                     "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
+                     tmp.name],
+                    capture_output=True, check=True,
+                )
+
+                # Wrap on_segment to offset the index
+                chunk_on_segment = None
+                if on_segment:
+                    base_index = global_segment_index
+                    def chunk_on_segment(seg: SegmentResult, idx: int, _base=base_index) -> None:
+                        on_segment(seg, _base + idx)
+
+                chunk_result = self.transcribe(
+                    request_id, tmp.name, model_name, device,
+                    compute_type, language, on_segment=chunk_on_segment,
+                )
+
+                # Offset timestamps and merge
+                for seg in chunk_result.segments:
+                    seg.start_ms += chunk_start_ms
+                    seg.end_ms += chunk_start_ms
+                    for word in seg.words:
+                        word.start_ms += chunk_start_ms
+                        word.end_ms += chunk_start_ms
+                    merged_result.segments.append(seg)
+
+                global_segment_index += len(chunk_result.segments)
+
+                # Take language from first chunk
+                if chunk_idx == 0:
+                    merged_result.language = chunk_result.language
+                    merged_result.language_probability = chunk_result.language_probability
+
+            finally:
+                import os
+                os.unlink(tmp.name)
+
+            # Chunk progress
+            chunk_pct = min(10 + int(((chunk_idx + 1) / num_chunks) * 80), 90)
+            write_message(progress_message(
+                request_id, chunk_pct, "transcribing",
+                f"Completed chunk {chunk_idx + 1}/{num_chunks}..."))
+
+        merged_result.duration_ms = int(total_duration * 1000)
+        write_message(progress_message(request_id, 100, "done", "Transcription complete"))
+        return merged_result
+

 def result_to_payload(result: TranscriptionResult) -> dict[str, Any]:
    """Convert TranscriptionResult to IPC payload dict."""
--- a/python/voice_to_notes/utils/ffmpeg.py
+++ b/python/voice_to_notes/utils/ffmpeg.py
@@ -0,0 +1,43 @@
+"""Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
+
+from __future__ import annotations
+
+import os
+import sys
+
+
+def get_ffmpeg_path() -> str:
+    """Return the path to the ffmpeg binary.
+
+    When running as a frozen PyInstaller bundle, looks next to sys.executable.
+    Otherwise falls back to the system PATH.
+    """
+    if getattr(sys, "frozen", False):
+        # Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
+        bundle_dir = os.path.dirname(sys.executable)
+        candidates = [
+            os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
+            os.path.join(bundle_dir, "ffmpeg"),
+        ]
+        for path in candidates:
+            if os.path.isfile(path):
+                return path
+    return "ffmpeg"
+
+
+def get_ffprobe_path() -> str:
+    """Return the path to the ffprobe binary.
+
+    When running as a frozen PyInstaller bundle, looks next to sys.executable.
+    Otherwise falls back to the system PATH.
+    """
+    if getattr(sys, "frozen", False):
+        bundle_dir = os.path.dirname(sys.executable)
+        candidates = [
+            os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
+            os.path.join(bundle_dir, "ffprobe"),
+        ]
+        for path in candidates:
+            if os.path.isfile(path):
+                return path
+    return "ffprobe"
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -59,6 +59,15 @@ version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"

+[[package]]
+name = "arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
+dependencies = [
+ "derive_arbitrary",
+]
+
 [[package]]
 name = "async-broadcast"
 version = "0.7.2"
@@ -655,6 +664,17 @@ dependencies = [
 "serde_core",
 ]

+[[package]]
+name = "derive_arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "derive_more"
 version = "0.99.20"
@@ -4362,7 +4382,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"

 [[package]]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "0.2.2"
 dependencies = [
 "chrono",
 "rusqlite",
@@ -4374,6 +4394,7 @@ dependencies = [
 "tauri-plugin-opener",
 "thiserror 1.0.69",
 "uuid",
+ "zip",
 ]

 [[package]]
@@ -5412,12 +5433,41 @@ dependencies = [
 "syn 2.0.117",
 ]

+[[package]]
+name = "zip"
+version = "2.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
+dependencies = [
+ "arbitrary",
+ "crc32fast",
+ "crossbeam-utils",
+ "displaydoc",
+ "flate2",
+ "indexmap 2.13.0",
+ "memchr",
+ "thiserror 2.0.18",
+ "zopfli",
+]
+
 [[package]]
 name = "zmij"
 version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"

+[[package]]
+name = "zopfli"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
+dependencies = [
+ "bumpalo",
+ "crc32fast",
+ "log",
+ "simd-adler32",
+]
+
 [[package]]
 name = "zvariant"
 version = "5.10.0"
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "voice-to-notes"
-version = "0.1.0"
+version = "0.2.27"
 description = "Voice to Notes — desktop transcription with speaker identification"
 authors = ["Voice to Notes Contributors"]
 license = "MIT"
@@ -14,12 +14,16 @@ crate-type = ["staticlib", "cdylib", "rlib"]
 tauri-build = { version = "2", features = [] }

 [dependencies]
-tauri = { version = "2", features = ["protocol-asset"] }
+tauri = { version = "2", features = ["protocol-asset", "devtools"] }
 tauri-plugin-opener = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 rusqlite = { version = "0.31", features = ["bundled"] }
 uuid = { version = "1", features = ["v4", "serde"] }
+zip = { version = "2", default-features = false, features = ["deflate"] }
 thiserror = "1"
 chrono = { version = "0.4", features = ["serde"] }
 tauri-plugin-dialog = "2.6.0"
+reqwest = { version = "0.12", features = ["stream", "json"] }
+futures-util = "0.3"
+bytes = "1"
--- a/src-tauri/binaries/.gitkeep
+++ b/src-tauri/binaries/.gitkeep
--- a/src-tauri/build.rs
+++ b/src-tauri/build.rs
@@ -1,3 +1,21 @@
 fn main() {
+    // Ensure sidecar.zip exists so tauri-build doesn't fail.
+    // CI replaces this placeholder with the real PyInstaller sidecar archive.
+    let zip_path = std::path::Path::new("sidecar.zip");
+    if !zip_path.exists() {
+        // Minimal valid zip (empty archive): end-of-central-directory record
+        let empty_zip: [u8; 22] = [
+            0x50, 0x4b, 0x05, 0x06, // EOCD signature
+            0x00, 0x00, // disk number
+            0x00, 0x00, // disk with central dir
+            0x00, 0x00, // entries on this disk
+            0x00, 0x00, // total entries
+            0x00, 0x00, 0x00, 0x00, // central dir size
+            0x00, 0x00, 0x00, 0x00, // central dir offset
+            0x00, 0x00, // comment length
+        ];
+        std::fs::write(zip_path, empty_zip).expect("Failed to create placeholder sidecar.zip");
+    }
+
    tauri_build::build()
 }
--- a/src-tauri/nsis-hooks.nsh
+++ b/src-tauri/nsis-hooks.nsh
@@ -0,0 +1,11 @@
+; NSIS uninstall hook for Voice to Notes
+; Removes the sidecar data directory (extracted sidecar binaries + logs)
+; but preserves user data in $PROFILE\.voicetonotes (database, settings, models)
+
+!macro NSIS_HOOK_POSTUNINSTALL
+  ; Remove the Tauri app_local_data_dir which contains:
+  ;   - Extracted sidecar directories (voice-to-notes-sidecar/)
+  ;   - sidecar.log
+  ; Path: %LOCALAPPDATA%\com.voicetonotes.app
+  RMDir /r "$LOCALAPPDATA\com.voicetonotes.app"
+!macroend
--- a/src-tauri/src/commands/ai.rs
+++ b/src-tauri/src/commands/ai.rs
@@ -39,7 +39,11 @@ pub fn ai_chat(
    if response.msg_type == "error" {
        return Err(format!(
            "AI error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
+                .payload
+                .get("message")
+                .and_then(|v| v.as_str())
+                .unwrap_or("unknown")
        ));
    }

--- a/src-tauri/src/commands/export.rs
+++ b/src-tauri/src/commands/export.rs
@@ -33,7 +33,11 @@ pub fn export_transcript(
    if response.msg_type == "error" {
        return Err(format!(
            "Export error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
+                .payload
+                .get("message")
+                .and_then(|v| v.as_str())
+                .unwrap_or("unknown")
        ));
    }

--- a/src-tauri/src/commands/mod.rs
+++ b/src-tauri/src/commands/mod.rs
@@ -2,5 +2,6 @@ pub mod ai;
 pub mod export;
 pub mod project;
 pub mod settings;
+pub mod sidecar;
 pub mod system;
 pub mod transcribe;
--- a/src-tauri/src/commands/project.rs
+++ b/src-tauri/src/commands/project.rs
@@ -1,9 +1,110 @@
+use serde::{Deserialize, Serialize};
+use std::fs;
 use tauri::State;

 use crate::db::models::Project;
 use crate::db::queries;
 use crate::state::AppState;

+// ── File-based project types ────────────────────────────────────
+
+#[derive(Serialize, Deserialize)]
+pub struct ProjectFile {
+    pub version: u32,
+    pub name: String,
+    pub audio_file: String,
+    pub created_at: String,
+    pub segments: Vec<ProjectFileSegment>,
+    pub speakers: Vec<ProjectFileSpeaker>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct ProjectFileSegment {
+    pub text: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub speaker: Option<String>,
+    pub is_edited: bool,
+    pub words: Vec<ProjectFileWord>,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct ProjectFileWord {
+    pub word: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub confidence: f64,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct ProjectFileSpeaker {
+    pub label: String,
+    pub display_name: Option<String>,
+    pub color: String,
+}
+
+// ── Input types for save_project_transcript ──────────────────────
+
+#[derive(Deserialize)]
+pub struct WordInput {
+    pub word: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub confidence: f64,
+}
+
+#[derive(Deserialize)]
+pub struct SegmentInput {
+    pub text: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub speaker: Option<String>, // speaker label, not id
+    pub words: Vec<WordInput>,
+}
+
+#[derive(Deserialize)]
+pub struct SpeakerInput {
+    pub label: String,
+    pub color: String,
+}
+
+// ── Output types for load_project_transcript ─────────────────────
+
+#[derive(Serialize)]
+pub struct WordOutput {
+    pub word: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub confidence: Option<f64>,
+}
+
+#[derive(Serialize)]
+pub struct SegmentOutput {
+    pub id: String,
+    pub text: String,
+    pub start_ms: i64,
+    pub end_ms: i64,
+    pub speaker: Option<String>, // speaker label
+    pub words: Vec<WordOutput>,
+}
+
+#[derive(Serialize)]
+pub struct SpeakerOutput {
+    pub id: String,
+    pub label: String,
+    pub display_name: Option<String>,
+    pub color: Option<String>,
+}
+
+#[derive(Serialize)]
+pub struct ProjectTranscript {
+    pub file_path: String,
+    pub segments: Vec<SegmentOutput>,
+    pub speakers: Vec<SpeakerOutput>,
+}
+
+// ── Commands ─────────────────────────────────────────────────────
+
 #[tauri::command]
 pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
@@ -21,3 +122,176 @@ pub fn list_projects(state: State<AppState>) -> Result<Vec<Project>, String> {
    let conn = state.db.lock().map_err(|e| e.to_string())?;
    queries::list_projects(&conn).map_err(|e| e.to_string())
 }
+
+#[tauri::command]
+pub fn delete_project(id: String, state: State<AppState>) -> Result<(), String> {
+    let conn = state.db.lock().map_err(|e| e.to_string())?;
+    queries::delete_project(&conn, &id).map_err(|e| e.to_string())
+}
+
+#[tauri::command]
+pub fn update_segment(
+    segment_id: String,
+    new_text: String,
+    state: State<AppState>,
+) -> Result<(), String> {
+    let conn = state.db.lock().map_err(|e| e.to_string())?;
+    queries::update_segment_text(&conn, &segment_id, &new_text).map_err(|e| e.to_string())
+}
+
+#[tauri::command]
+pub fn save_project_transcript(
+    project_id: String,
+    file_path: String,
+    segments: Vec<SegmentInput>,
+    speakers: Vec<SpeakerInput>,
+    state: State<AppState>,
+) -> Result<Project, String> {
+    let conn = state.db.lock().map_err(|e| e.to_string())?;
+
+    // 1. Create media file entry
+    let media_file =
+        queries::create_media_file(&conn, &project_id, &file_path).map_err(|e| e.to_string())?;
+
+    // 2. Create speaker entries and build label -> id map
+    let mut speaker_map = std::collections::HashMap::new();
+    for speaker_input in &speakers {
+        let speaker = queries::create_speaker(
+            &conn,
+            &project_id,
+            &speaker_input.label,
+            Some(&speaker_input.color),
+        )
+        .map_err(|e| e.to_string())?;
+        speaker_map.insert(speaker_input.label.clone(), speaker.id);
+    }
+
+    // 3. Create segments with words
+    for (index, seg_input) in segments.iter().enumerate() {
+        let speaker_id = seg_input
+            .speaker
+            .as_ref()
+            .and_then(|label| speaker_map.get(label));
+
+        let segment = queries::create_segment(
+            &conn,
+            &project_id,
+            &media_file.id,
+            speaker_id.map(|s| s.as_str()),
+            seg_input.start_ms,
+            seg_input.end_ms,
+            &seg_input.text,
+            index as i32,
+        )
+        .map_err(|e| e.to_string())?;
+
+        // Create words for this segment
+        for (word_index, word_input) in seg_input.words.iter().enumerate() {
+            queries::create_word(
+                &conn,
+                &segment.id,
+                &word_input.word,
+                word_input.start_ms,
+                word_input.end_ms,
+                Some(word_input.confidence),
+                word_index as i32,
+            )
+            .map_err(|e| e.to_string())?;
+        }
+    }
+
+    // 4. Return updated project info
+    queries::get_project(&conn, &project_id)
+        .map_err(|e| e.to_string())?
+        .ok_or_else(|| "Project not found".to_string())
+}
+
+#[tauri::command]
+pub fn load_project_transcript(
+    project_id: String,
+    state: State<AppState>,
+) -> Result<Option<ProjectTranscript>, String> {
+    let conn = state.db.lock().map_err(|e| e.to_string())?;
+
+    // 1. Get media files for the project
+    let media_files =
+        queries::get_media_files_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
+
+    let media_file = match media_files.first() {
+        Some(mf) => mf,
+        None => return Ok(None),
+    };
+
+    // 2. Get speakers for the project and build id -> label map
+    let speakers =
+        queries::get_speakers_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
+    let speaker_label_map: std::collections::HashMap<String, String> = speakers
+        .iter()
+        .map(|s| (s.id.clone(), s.label.clone()))
+        .collect();
+
+    // 3. Get segments for the media file
+    let db_segments =
+        queries::get_segments_for_media(&conn, &media_file.id).map_err(|e| e.to_string())?;
+
+    // 4. Build output segments with nested words
+    let mut segment_outputs = Vec::with_capacity(db_segments.len());
+    for seg in &db_segments {
+        let words = queries::get_words_for_segment(&conn, &seg.id).map_err(|e| e.to_string())?;
+        let word_outputs: Vec<WordOutput> = words
+            .into_iter()
+            .map(|w| WordOutput {
+                word: w.word,
+                start_ms: w.start_ms,
+                end_ms: w.end_ms,
+                confidence: w.confidence,
+            })
+            .collect();
+
+        let speaker_label = seg
+            .speaker_id
+            .as_ref()
+            .and_then(|sid| speaker_label_map.get(sid))
+            .cloned();
+
+        segment_outputs.push(SegmentOutput {
+            id: seg.id.clone(),
+            text: seg.text.clone(),
+            start_ms: seg.start_ms,
+            end_ms: seg.end_ms,
+            speaker: speaker_label,
+            words: word_outputs,
+        });
+    }
+
+    // 5. Build speaker outputs
+    let speaker_outputs: Vec<SpeakerOutput> = speakers
+        .into_iter()
+        .map(|s| SpeakerOutput {
+            id: s.id,
+            label: s.label,
+            display_name: s.display_name,
+            color: s.color,
+        })
+        .collect();
+
+    Ok(Some(ProjectTranscript {
+        file_path: media_file.file_path.clone(),
+        segments: segment_outputs,
+        speakers: speaker_outputs,
+    }))
+}
+
+// ── File-based project commands ─────────────────────────────────
+
+#[tauri::command]
+pub fn save_project_file(path: String, project: ProjectFile) -> Result<(), String> {
+    let json = serde_json::to_string_pretty(&project).map_err(|e| e.to_string())?;
+    fs::write(&path, json).map_err(|e| format!("Failed to save project: {e}"))
+}
+
+#[tauri::command]
+pub fn load_project_file(path: String) -> Result<ProjectFile, String> {
+    let json = fs::read_to_string(&path).map_err(|e| format!("Failed to read project: {e}"))?;
+    serde_json::from_str(&json).map_err(|e| format!("Failed to parse project: {e}"))
+}
--- a/src-tauri/src/commands/settings.rs
+++ b/src-tauri/src/commands/settings.rs
@@ -32,3 +32,16 @@ pub fn save_settings(settings: Value) -> Result<(), String> {
    fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
    Ok(())
 }
+
+/// Toggle devtools on the main window.
+#[tauri::command]
+pub fn toggle_devtools(app: tauri::AppHandle, open: bool) {
+    use tauri::Manager;
+    if let Some(window) = app.get_webview_window("main") {
+        if open {
+            window.open_devtools();
+        } else {
+            window.close_devtools();
+        }
+    }
+}
--- a/src-tauri/src/commands/sidecar.rs
+++ b/src-tauri/src/commands/sidecar.rs
@@ -0,0 +1,252 @@
+use futures_util::StreamExt;
+use serde::Serialize;
+use std::io::Write;
+use tauri::{AppHandle, Emitter};
+
+use crate::sidecar::{SidecarManager, DATA_DIR};
+
+const REPO_API: &str = "https://repo.anhonesthost.net/api/v1/repos/MacroPad/voice-to-notes";
+
+#[derive(Serialize, Clone)]
+struct DownloadProgress {
+    downloaded: u64,
+    total: u64,
+    percent: u8,
+}
+
+#[derive(Serialize)]
+pub struct UpdateInfo {
+    pub current_version: String,
+    pub latest_version: String,
+}
+
+/// Read the locally installed sidecar version from `sidecar-version.txt`.
+/// Returns `None` if the file doesn't exist or can't be read.
+fn read_local_sidecar_version() -> Option<String> {
+    let data_dir = DATA_DIR.get()?;
+    let version_file = data_dir.join("sidecar-version.txt");
+    std::fs::read_to_string(version_file)
+        .ok()
+        .map(|v| v.trim().to_string())
+        .filter(|v| !v.is_empty())
+}
+
+/// Write the sidecar version to `sidecar-version.txt` after a successful download.
+fn write_local_sidecar_version(version: &str) -> Result<(), String> {
+    let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
+    let version_file = data_dir.join("sidecar-version.txt");
+    std::fs::write(&version_file, version)
+        .map_err(|e| format!("Failed to write sidecar version file: {}", e))
+}
+
+/// Fetch releases from the Gitea API and find the latest sidecar release
+/// (one whose tag_name starts with "sidecar-v").
+async fn fetch_latest_sidecar_release(
+    client: &reqwest::Client,
+) -> Result<serde_json::Value, String> {
+    let releases_url = format!("{}/releases?limit=20", REPO_API);
+    let resp = client
+        .get(&releases_url)
+        .header("Accept", "application/json")
+        .send()
+        .await
+        .map_err(|e| format!("Failed to fetch releases: {}", e))?;
+
+    if !resp.status().is_success() {
+        return Err(format!("Failed to fetch releases: HTTP {}", resp.status()));
+    }
+
+    let releases = resp
+        .json::<Vec<serde_json::Value>>()
+        .await
+        .map_err(|e| format!("Failed to parse releases JSON: {}", e))?;
+
+    releases
+        .into_iter()
+        .find(|r| {
+            r["tag_name"]
+                .as_str()
+                .map_or(false, |t| t.starts_with("sidecar-v"))
+        })
+        .ok_or_else(|| "No sidecar release found".to_string())
+}
+
+/// Extract the version string from a sidecar tag name (e.g. "sidecar-v1.0.1" -> "1.0.1").
+fn version_from_sidecar_tag(tag: &str) -> &str {
+    tag.strip_prefix("sidecar-v").unwrap_or(tag)
+}
+
+/// Check if the sidecar binary exists for the currently installed version.
+#[tauri::command]
+pub fn check_sidecar() -> bool {
+    let data_dir = match DATA_DIR.get() {
+        Some(d) => d,
+        None => return false,
+    };
+
+    let version = match read_local_sidecar_version() {
+        Some(v) => v,
+        None => return false,
+    };
+
+    let binary_name = if cfg!(target_os = "windows") {
+        "voice-to-notes-sidecar.exe"
+    } else {
+        "voice-to-notes-sidecar"
+    };
+
+    let extract_dir = data_dir.join(format!("sidecar-{}", version));
+    extract_dir.join(binary_name).exists()
+}
+
+/// Determine the current platform name for asset downloads.
+fn platform_os() -> &'static str {
+    if cfg!(target_os = "windows") {
+        "windows"
+    } else if cfg!(target_os = "macos") {
+        "macos"
+    } else {
+        "linux"
+    }
+}
+
+/// Determine the current architecture name for asset downloads.
+fn platform_arch() -> &'static str {
+    if cfg!(target_arch = "aarch64") {
+        "aarch64"
+    } else {
+        "x86_64"
+    }
+}
+
+/// Download the sidecar binary for the given variant (cpu or cuda).
+#[tauri::command]
+pub async fn download_sidecar(app: AppHandle, variant: String) -> Result<(), String> {
+    let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
+
+    let os = platform_os();
+    let arch = platform_arch();
+    let asset_name = format!("sidecar-{}-{}-{}.zip", os, arch, variant);
+
+    // Fetch the latest sidecar release from Gitea API
+    let client = reqwest::Client::new();
+    let sidecar_release = fetch_latest_sidecar_release(&client).await?;
+
+    let tag = sidecar_release["tag_name"]
+        .as_str()
+        .ok_or("No tag_name in sidecar release")?;
+    let sidecar_version = version_from_sidecar_tag(tag).to_string();
+
+    // Find the matching asset
+    let assets = sidecar_release["assets"]
+        .as_array()
+        .ok_or("No assets found in sidecar release")?;
+
+    let download_url = assets
+        .iter()
+        .find(|a| a["name"].as_str() == Some(&asset_name))
+        .and_then(|a| a["browser_download_url"].as_str())
+        .ok_or_else(|| {
+            format!(
+                "Asset '{}' not found in sidecar release {}",
+                asset_name, tag
+            )
+        })?
+        .to_string();
+
+    // Stream download with progress events
+    let response: reqwest::Response = client
+        .get(&download_url)
+        .send()
+        .await
+        .map_err(|e| format!("Failed to start download: {}", e))?;
+
+    if !response.status().is_success() {
+        return Err(format!("Download failed: HTTP {}", response.status()));
+    }
+
+    let total: u64 = response.content_length().unwrap_or(0);
+    let mut downloaded: u64 = 0;
+    let mut stream = response.bytes_stream();
+
+    let zip_path = data_dir.join("sidecar.zip");
+    let mut file = std::fs::File::create(&zip_path)
+        .map_err(|e| format!("Failed to create zip file: {}", e))?;
+
+    while let Some(chunk) = stream.next().await {
+        let chunk: bytes::Bytes = chunk.map_err(|e| format!("Download stream error: {}", e))?;
+        file.write_all(&chunk)
+            .map_err(|e| format!("Failed to write chunk: {}", e))?;
+        downloaded += chunk.len() as u64;
+        let percent = if total > 0 {
+            (downloaded * 100 / total) as u8
+        } else {
+            0
+        };
+        let _ = app.emit(
+            "sidecar-download-progress",
+            DownloadProgress {
+                downloaded,
+                total,
+                percent,
+            },
+        );
+    }
+
+    // Extract the downloaded zip
+    let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
+    SidecarManager::extract_zip(&zip_path, &extract_dir)?;
+
+    // Make the binary executable on Unix
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        let binary_path = extract_dir.join("voice-to-notes-sidecar");
+        if let Ok(meta) = std::fs::metadata(&binary_path) {
+            let mut perms = meta.permissions();
+            perms.set_mode(0o755);
+            let _ = std::fs::set_permissions(&binary_path, perms);
+        }
+    }
+
+    // Write the sidecar version file
+    write_local_sidecar_version(&sidecar_version)?;
+
+    // Clean up the zip file and old sidecar versions
+    let _ = std::fs::remove_file(&zip_path);
+    SidecarManager::cleanup_old_sidecars(data_dir, &sidecar_version);
+
+    Ok(())
+}
+
+/// Check if a sidecar update is available.
+#[tauri::command]
+pub async fn check_sidecar_update() -> Result<Option<UpdateInfo>, String> {
+    // If sidecar doesn't exist yet, return None (first launch handled separately)
+    if !check_sidecar() {
+        return Ok(None);
+    }
+
+    let current_version = match read_local_sidecar_version() {
+        Some(v) => v,
+        None => return Ok(None),
+    };
+
+    // Fetch latest sidecar release from Gitea API
+    let client = reqwest::Client::new();
+    let sidecar_release = fetch_latest_sidecar_release(&client).await?;
+
+    let latest_tag = sidecar_release["tag_name"]
+        .as_str()
+        .ok_or("No tag_name in sidecar release")?;
+    let latest_version = version_from_sidecar_tag(latest_tag);
+
+    if latest_version != current_version {
+        Ok(Some(UpdateInfo {
+            current_version,
+            latest_version: latest_version.to_string(),
+        }))
+    } else {
+        Ok(None)
+    }
+}
--- a/src-tauri/src/commands/system.rs
+++ b/src-tauri/src/commands/system.rs
@@ -22,9 +22,7 @@ pub fn llama_start(
    threads: Option<u32>,
 ) -> Result<LlamaStatus, String> {
    let config = LlamaConfig {
-        binary_path: PathBuf::from(
-            binary_path.unwrap_or_else(|| "llama-server".to_string()),
-        ),
+        binary_path: PathBuf::from(binary_path.unwrap_or_else(|| "llama-server".to_string())),
        model_path: PathBuf::from(model_path),
        port: port.unwrap_or(0),
        n_gpu_layers: n_gpu_layers.unwrap_or(0),
@@ -62,3 +60,18 @@ pub fn llama_list_models() -> Value {
 pub fn get_data_dir() -> String {
    LlamaManager::data_dir().to_string_lossy().to_string()
 }
+
+/// Log a message from the frontend to a file for debugging.
+#[tauri::command]
+pub fn log_frontend(level: String, message: String) {
+    use std::io::Write;
+    let log_path = LlamaManager::data_dir().join("frontend.log");
+    if let Ok(mut file) = std::fs::OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(&log_path)
+    {
+        let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
+        let _ = writeln!(file, "[{timestamp}] [{level}] {message}");
+    }
+}
--- a/src-tauri/src/commands/transcribe.rs
+++ b/src-tauri/src/commands/transcribe.rs
@@ -33,16 +33,47 @@ pub fn transcribe_file(
    if response.msg_type == "error" {
        return Err(format!(
            "Transcription error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
+            response
+                .payload
+                .get("message")
+                .and_then(|v| v.as_str())
+                .unwrap_or("unknown")
        ));
    }

    Ok(response.payload)
 }

+/// Download and validate the diarization model via the Python sidecar.
+#[tauri::command]
+pub fn download_diarize_model(hf_token: String) -> Result<Value, String> {
+    let manager = sidecar();
+    manager.ensure_running()?;
+
+    let request_id = uuid::Uuid::new_v4().to_string();
+    let msg = IPCMessage::new(
+        &request_id,
+        "diarize.download",
+        json!({
+            "hf_token": hf_token,
+        }),
+    );
+
+    let response = manager.send_and_receive(&msg)?;
+
+    if response.msg_type == "error" {
+        return Ok(json!({
+            "ok": false,
+            "error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
+        }));
+    }
+
+    Ok(json!({ "ok": true }))
+}
+
 /// Run the full transcription + diarization pipeline via the Python sidecar.
 #[tauri::command]
-pub fn run_pipeline(
+pub async fn run_pipeline(
    app: AppHandle,
    file_path: String,
    model: Option<String>,
@@ -52,6 +83,7 @@ pub fn run_pipeline(
    min_speakers: Option<u32>,
    max_speakers: Option<u32>,
    skip_diarization: Option<bool>,
+    hf_token: Option<String>,
 ) -> Result<Value, String> {
    let manager = sidecar();
    manager.ensure_running()?;
@@ -70,19 +102,38 @@ pub fn run_pipeline(
            "min_speakers": min_speakers,
            "max_speakers": max_speakers,
            "skip_diarization": skip_diarization.unwrap_or(false),
+            "hf_token": hf_token,
        }),
    );

-    let response = manager.send_and_receive_with_progress(&msg, |progress| {
-        let _ = app.emit("pipeline-progress", &progress.payload);
-    })?;
+    // Run the blocking sidecar I/O on a separate thread so the async runtime
+    // can deliver emitted events to the webview while processing is ongoing.
+    let app_handle = app.clone();
+    tauri::async_runtime::spawn_blocking(move || {
+        let response = manager.send_and_receive_with_progress(&msg, |msg| {
+            let event_name = match msg.msg_type.as_str() {
+                "pipeline.segment" => "pipeline-segment",
+                "pipeline.speaker_update" => "pipeline-speaker-update",
+                _ => "pipeline-progress",
+            };
+            if let Err(e) = app_handle.emit(event_name, &msg.payload) {
+                eprintln!("[sidecar-rs] Failed to emit {event_name}: {e}");
+            }
+        })?;

-    if response.msg_type == "error" {
-        return Err(format!(
-            "Pipeline error: {}",
-            response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
-        ));
-    }
+        if response.msg_type == "error" {
+            return Err(format!(
+                "Pipeline error: {}",
+                response
+                    .payload
+                    .get("message")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or("unknown")
+            ));
+        }

-    Ok(response.payload)
+        Ok(response.payload)
+    })
+    .await
+    .map_err(|e| format!("Pipeline task failed: {e}"))?
 }
--- a/src-tauri/src/db/queries.rs
+++ b/src-tauri/src/db/queries.rs
@@ -85,6 +85,57 @@ pub fn delete_project(conn: &Connection, id: &str) -> Result<(), DatabaseError>
    Ok(())
 }

+// ── Media Files ──────────────────────────────────────────────────
+
+pub fn create_media_file(
+    conn: &Connection,
+    project_id: &str,
+    file_path: &str,
+) -> Result<MediaFile, DatabaseError> {
+    let id = Uuid::new_v4().to_string();
+    let now = Utc::now().to_rfc3339();
+    conn.execute(
+        "INSERT INTO media_files (id, project_id, file_path, created_at) VALUES (?1, ?2, ?3, ?4)",
+        params![id, project_id, file_path, now],
+    )?;
+    Ok(MediaFile {
+        id,
+        project_id: project_id.to_string(),
+        file_path: file_path.to_string(),
+        file_hash: None,
+        duration_ms: None,
+        sample_rate: None,
+        channels: None,
+        format: None,
+        file_size: None,
+        created_at: now,
+    })
+}
+
+pub fn get_media_files_for_project(
+    conn: &Connection,
+    project_id: &str,
+) -> Result<Vec<MediaFile>, DatabaseError> {
+    let mut stmt = conn.prepare(
+        "SELECT id, project_id, file_path, file_hash, duration_ms, sample_rate, channels, format, file_size, created_at FROM media_files WHERE project_id = ?1 ORDER BY created_at",
+    )?;
+    let rows = stmt.query_map(params![project_id], |row| {
+        Ok(MediaFile {
+            id: row.get(0)?,
+            project_id: row.get(1)?,
+            file_path: row.get(2)?,
+            file_hash: row.get(3)?,
+            duration_ms: row.get(4)?,
+            sample_rate: row.get(5)?,
+            channels: row.get(6)?,
+            format: row.get(7)?,
+            file_size: row.get(8)?,
+            created_at: row.get(9)?,
+        })
+    })?;
+    Ok(rows.collect::<Result<Vec<_>, _>>()?)
+}
+
 // ── Speakers ──────────────────────────────────────────────────────

 pub fn create_speaker(
@@ -194,6 +245,39 @@ pub fn reassign_speaker(
    Ok(())
 }

+// ── Segments (create) ────────────────────────────────────────────
+
+pub fn create_segment(
+    conn: &Connection,
+    project_id: &str,
+    media_file_id: &str,
+    speaker_id: Option<&str>,
+    start_ms: i64,
+    end_ms: i64,
+    text: &str,
+    segment_index: i32,
+) -> Result<Segment, DatabaseError> {
+    let id = Uuid::new_v4().to_string();
+    conn.execute(
+        "INSERT INTO segments (id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, is_edited, segment_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 0, ?8)",
+        params![id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, segment_index],
+    )?;
+    Ok(Segment {
+        id,
+        project_id: project_id.to_string(),
+        media_file_id: media_file_id.to_string(),
+        speaker_id: speaker_id.map(String::from),
+        start_ms,
+        end_ms,
+        text: text.to_string(),
+        original_text: None,
+        confidence: None,
+        is_edited: false,
+        edited_at: None,
+        segment_index,
+    })
+}
+
 // ── Words ─────────────────────────────────────────────────────────

 pub fn get_words_for_segment(
@@ -217,6 +301,31 @@ pub fn get_words_for_segment(
    Ok(rows.collect::<Result<Vec<_>, _>>()?)
 }

+pub fn create_word(
+    conn: &Connection,
+    segment_id: &str,
+    word: &str,
+    start_ms: i64,
+    end_ms: i64,
+    confidence: Option<f64>,
+    word_index: i32,
+) -> Result<Word, DatabaseError> {
+    let id = Uuid::new_v4().to_string();
+    conn.execute(
+        "INSERT INTO words (id, segment_id, word, start_ms, end_ms, confidence, word_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
+        params![id, segment_id, word, start_ms, end_ms, confidence, word_index],
+    )?;
+    Ok(Word {
+        id,
+        segment_id: segment_id.to_string(),
+        word: word.to_string(),
+        start_ms,
+        end_ms,
+        confidence,
+        word_index,
+    })
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src-tauri/src/db/schema.rs
+++ b/src-tauri/src/db/schema.rs
@@ -96,11 +96,7 @@ pub fn create_tables(conn: &Connection) -> Result<(), DatabaseError> {
    )?;

    // Initialize schema version if empty
-    let count: i32 = conn.query_row(
-        "SELECT COUNT(*) FROM schema_version",
-        [],
-        |row| row.get(0),
-    )?;
+    let count: i32 = conn.query_row("SELECT COUNT(*) FROM schema_version", [], |row| row.get(0))?;
    if count == 0 {
        conn.execute(
            "INSERT INTO schema_version (version) VALUES (?1)",
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -4,12 +4,21 @@ pub mod llama;
 pub mod sidecar;
 pub mod state;

+use tauri::window::Color;
+use tauri::Manager;
+
 use commands::ai::{ai_chat, ai_configure, ai_list_providers};
 use commands::export::export_transcript;
-use commands::project::{create_project, get_project, list_projects};
-use commands::settings::{load_settings, save_settings};
-use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
-use commands::transcribe::{run_pipeline, transcribe_file};
+use commands::project::{
+    create_project, delete_project, get_project, list_projects, load_project_file,
+    load_project_transcript, save_project_file, save_project_transcript, update_segment,
+};
+use commands::settings::{load_settings, save_settings, toggle_devtools};
+use commands::sidecar::{check_sidecar, check_sidecar_update, download_sidecar};
+use commands::system::{
+    get_data_dir, llama_list_models, llama_start, llama_status, llama_stop, log_frontend,
+};
+use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
 use state::AppState;

 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -20,12 +29,34 @@ pub fn run() {
        .plugin(tauri_plugin_opener::init())
        .plugin(tauri_plugin_dialog::init())
        .manage(app_state)
+        .setup(|app| {
+            // Tell the sidecar manager where Tauri placed bundled resources
+            // and where to extract the sidecar archive
+            if let (Ok(resource_dir), Ok(data_dir)) =
+                (app.path().resource_dir(), app.path().app_local_data_dir())
+            {
+                sidecar::init_dirs(resource_dir, data_dir);
+            }
+
+            // Set the webview background to match the app's dark theme
+            if let Some(window) = app.get_webview_window("main") {
+                let _ = window.set_background_color(Some(Color(10, 10, 35, 255)));
+            }
+            Ok(())
+        })
        .invoke_handler(tauri::generate_handler![
            create_project,
            get_project,
            list_projects,
+            delete_project,
+            save_project_transcript,
+            load_project_transcript,
+            update_segment,
+            save_project_file,
+            load_project_file,
            transcribe_file,
            run_pipeline,
+            download_diarize_model,
            export_transcript,
            ai_chat,
            ai_list_providers,
@@ -37,6 +68,11 @@ pub fn run() {
            get_data_dir,
            load_settings,
            save_settings,
+            check_sidecar,
+            download_sidecar,
+            check_sidecar_update,
+            log_frontend,
+            toggle_devtools,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src-tauri/src/llama/mod.rs
+++ b/src-tauri/src/llama/mod.rs
@@ -237,11 +237,7 @@ impl LlamaManager {

    /// Get the current status.
    pub fn status(&self) -> LlamaStatus {
-        let running = self
-            .process
-            .lock()
-            .ok()
-            .map_or(false, |p| p.is_some());
+        let running = self.process.lock().ok().map_or(false, |p| p.is_some());
        let port = self.port.lock().ok().map_or(0, |p| *p);
        let model = self
            .model_path
--- a/src-tauri/src/sidecar/mod.rs
+++ b/src-tauri/src/sidecar/mod.rs
@@ -2,19 +2,40 @@ pub mod ipc;
 pub mod messages;

 use std::io::{BufRead, BufReader, Write};
+use std::path::{Path, PathBuf};
 use std::process::{Child, ChildStdin, Command, Stdio};
 use std::sync::{Mutex, OnceLock};

+#[cfg(target_os = "windows")]
+use std::os::windows::process::CommandExt;
+
 use crate::sidecar::messages::IPCMessage;

+/// Resource directory set by the Tauri app during setup.
+static RESOURCE_DIR: OnceLock<PathBuf> = OnceLock::new();
+/// App data directory for extracting the sidecar archive.
+pub(crate) static DATA_DIR: OnceLock<PathBuf> = OnceLock::new();
+
+/// Initialize directories for sidecar resolution.
+/// Must be called from the Tauri setup before any sidecar operations.
+pub fn init_dirs(resource_dir: PathBuf, data_dir: PathBuf) {
+    RESOURCE_DIR.set(resource_dir).ok();
+    DATA_DIR.set(data_dir).ok();
+}
+
 /// Get the global sidecar manager singleton.
 pub fn sidecar() -> &'static SidecarManager {
    static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
    INSTANCE.get_or_init(SidecarManager::new)
 }

-/// Manages the Python sidecar process lifecycle.
-/// Uses separated stdin/stdout ownership to avoid BufReader conflicts.
+/// Manages the sidecar process lifecycle.
+///
+/// Supports two modes:
+/// - **Production**: spawns a frozen PyInstaller binary (no Python required)
+/// - **Dev mode**: spawns system Python with `-m voice_to_notes.main`
+///
+/// Dev mode is active when compiled in debug mode or when `VOICE_TO_NOTES_DEV=1`.
 pub struct SidecarManager {
    process: Mutex<Option<Child>>,
    stdin: Mutex<Option<ChildStdin>>,
@@ -30,38 +51,312 @@ impl SidecarManager {
        }
    }

+    /// Check if we should use dev mode (system Python).
+    fn is_dev_mode() -> bool {
+        cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
+    }
+
+    /// Read the locally installed sidecar version from `sidecar-version.txt`.
+    fn read_sidecar_version() -> Result<String, String> {
+        let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
+        let version_file = data_dir.join("sidecar-version.txt");
+        std::fs::read_to_string(&version_file)
+            .map_err(|_| {
+                "Sidecar not installed: sidecar-version.txt not found. Please download the sidecar."
+                    .to_string()
+            })
+            .map(|v| v.trim().to_string())
+            .and_then(|v| {
+                if v.is_empty() {
+                    Err(
+                        "Sidecar version file is empty. Please re-download the sidecar."
+                            .to_string(),
+                    )
+                } else {
+                    Ok(v)
+                }
+            })
+    }
+
+    /// Resolve the frozen sidecar binary path (production mode).
+    ///
+    /// Reads the installed sidecar version from `sidecar-version.txt` and
+    /// looks for the binary in the corresponding `sidecar-{version}` directory.
+    /// If the version file doesn't exist, the sidecar hasn't been downloaded yet.
+    fn resolve_sidecar_path() -> Result<PathBuf, String> {
+        let binary_name = if cfg!(target_os = "windows") {
+            "voice-to-notes-sidecar.exe"
+        } else {
+            "voice-to-notes-sidecar"
+        };
+
+        let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
+        let current_version = Self::read_sidecar_version()?;
+        let extract_dir = data_dir.join(format!("sidecar-{}", current_version));
+
+        let binary_path = extract_dir.join(binary_name);
+
+        // Already extracted — use it directly
+        if binary_path.exists() {
+            Self::cleanup_old_sidecars(data_dir, &current_version);
+            return Ok(binary_path);
+        }
+
+        // Find sidecar.zip in resource dir or next to exe
+        let zip_path = Self::find_sidecar_zip()?;
+        Self::extract_zip(&zip_path, &extract_dir)?;
+
+        if !binary_path.exists() {
+            return Err(format!(
+                "Sidecar binary not found after extraction at {}",
+                binary_path.display()
+            ));
+        }
+
+        // Make executable on Unix
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            if let Ok(meta) = std::fs::metadata(&binary_path) {
+                let mut perms = meta.permissions();
+                perms.set_mode(0o755);
+                let _ = std::fs::set_permissions(&binary_path, perms);
+            }
+        }
+
+        Self::cleanup_old_sidecars(data_dir, &current_version);
+        Ok(binary_path)
+    }
+
+    /// Locate the bundled sidecar.zip archive.
+    fn find_sidecar_zip() -> Result<PathBuf, String> {
+        let mut candidates: Vec<PathBuf> = Vec::new();
+
+        if let Some(resource_dir) = RESOURCE_DIR.get() {
+            candidates.push(resource_dir.join("sidecar.zip"));
+        }
+        if let Ok(exe) = std::env::current_exe() {
+            if let Some(exe_dir) = exe.parent() {
+                candidates.push(exe_dir.join("sidecar.zip"));
+            }
+        }
+
+        for path in &candidates {
+            if path.exists() {
+                return Ok(path.clone());
+            }
+        }
+
+        Err(format!(
+            "Sidecar archive not found. Checked:\n{}",
+            candidates
+                .iter()
+                .map(|p| format!("  {}", p.display()))
+                .collect::<Vec<_>>()
+                .join("\n"),
+        ))
+    }
+
+    /// Extract a zip archive to the given directory.
+    pub(crate) fn extract_zip(zip_path: &Path, dest: &Path) -> Result<(), String> {
+        eprintln!(
+            "[sidecar-rs] Extracting sidecar from {} to {}",
+            zip_path.display(),
+            dest.display()
+        );
+
+        // Clean destination so we don't mix old and new files
+        if dest.exists() {
+            std::fs::remove_dir_all(dest)
+                .map_err(|e| format!("Failed to clean extraction dir: {e}"))?;
+        }
+        std::fs::create_dir_all(dest)
+            .map_err(|e| format!("Failed to create extraction dir: {e}"))?;
+
+        let file =
+            std::fs::File::open(zip_path).map_err(|e| format!("Cannot open sidecar zip: {e}"))?;
+        let mut archive =
+            zip::ZipArchive::new(file).map_err(|e| format!("Invalid sidecar zip: {e}"))?;
+
+        for i in 0..archive.len() {
+            let mut entry = archive
+                .by_index(i)
+                .map_err(|e| format!("Zip entry error: {e}"))?;
+
+            let name = entry.name().to_string();
+            let outpath = dest.join(&name);
+
+            if entry.is_dir() {
+                std::fs::create_dir_all(&outpath)
+                    .map_err(|e| format!("Cannot create dir {}: {e}", outpath.display()))?;
+            } else {
+                if let Some(parent) = outpath.parent() {
+                    std::fs::create_dir_all(parent)
+                        .map_err(|e| format!("Cannot create dir {}: {e}", parent.display()))?;
+                }
+                let mut outfile = std::fs::File::create(&outpath)
+                    .map_err(|e| format!("Cannot create {}: {e}", outpath.display()))?;
+                std::io::copy(&mut entry, &mut outfile)
+                    .map_err(|e| format!("Write error for {}: {e}", name))?;
+            }
+        }
+
+        eprintln!("[sidecar-rs] Sidecar extracted successfully");
+        Ok(())
+    }
+
+    /// Remove old sidecar-* directories that don't match the current version.
+    /// Called after the current version's sidecar is confirmed ready.
+    pub(crate) fn cleanup_old_sidecars(data_dir: &Path, current_version: &str) {
+        let current_dir_name = format!("sidecar-{}", current_version);
+
+        let entries = match std::fs::read_dir(data_dir) {
+            Ok(entries) => entries,
+            Err(e) => {
+                eprintln!("[sidecar-rs] Cannot read data dir for cleanup: {e}");
+                return;
+            }
+        };
+
+        for entry in entries.flatten() {
+            let name = entry.file_name();
+            let name_str = name.to_string_lossy();
+
+            if !name_str.starts_with("sidecar-") {
+                continue;
+            }
+            if *name_str == current_dir_name {
+                continue;
+            }
+            if entry.path().is_dir() {
+                eprintln!(
+                    "[sidecar-rs] Removing old sidecar: {}",
+                    entry.path().display()
+                );
+                if let Err(e) = std::fs::remove_dir_all(entry.path()) {
+                    eprintln!(
+                        "[sidecar-rs] Failed to remove {}: {e}",
+                        entry.path().display()
+                    );
+                }
+            }
+        }
+    }
+
+    /// Find a working Python command for the current platform.
+    fn find_python_command() -> &'static str {
+        if cfg!(target_os = "windows") {
+            "python"
+        } else {
+            "python3"
+        }
+    }
+
+    /// Resolve the Python sidecar directory for dev mode.
+    fn resolve_python_dir() -> Result<std::path::PathBuf, String> {
+        let manifest_dir = env!("CARGO_MANIFEST_DIR");
+        let python_dir = std::path::Path::new(manifest_dir)
+            .join("../python")
+            .canonicalize()
+            .map_err(|e| format!("Cannot find python directory: {e}"))?;
+
+        if python_dir.exists() {
+            return Ok(python_dir);
+        }
+
+        // Fallback: relative to current exe
+        let exe = std::env::current_exe().map_err(|e| e.to_string())?;
+        let alt = exe
+            .parent()
+            .ok_or_else(|| "No parent dir".to_string())?
+            .join("../python")
+            .canonicalize()
+            .map_err(|e| format!("Cannot find python directory: {e}"))?;
+
+        Ok(alt)
+    }
+
    /// Ensure the sidecar is running, starting it if needed.
    pub fn ensure_running(&self) -> Result<(), String> {
        if self.is_running() {
            return Ok(());
        }

-        let python_path = std::env::current_dir()
-            .map_err(|e| e.to_string())?
-            .join("../python")
-            .canonicalize()
-            .map_err(|e| format!("Cannot find python directory: {e}"))?;
-
-        self.start(&python_path.to_string_lossy())
+        if Self::is_dev_mode() {
+            self.start_python_dev()
+        } else {
+            let path = Self::resolve_sidecar_path()?;
+            self.start_binary(&path)
+        }
    }

-    /// Spawn the Python sidecar process.
-    pub fn start(&self, python_path: &str) -> Result<(), String> {
-        // Stop existing process if any
+    /// Spawn the frozen sidecar binary (production mode).
+    fn start_binary(&self, path: &std::path::Path) -> Result<(), String> {
        self.stop().ok();
+        eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());

-        let mut child = Command::new("python3")
+        // Log sidecar stderr to a file for diagnostics
+        let stderr_cfg = if let Some(data_dir) = DATA_DIR.get() {
+            let _ = std::fs::create_dir_all(data_dir);
+            let log_path = data_dir.join("sidecar.log");
+            eprintln!("[sidecar-rs] Sidecar stderr → {}", log_path.display());
+            match std::fs::File::create(&log_path) {
+                Ok(f) => Stdio::from(f),
+                Err(e) => {
+                    eprintln!("[sidecar-rs] Failed to create sidecar.log: {e}");
+                    Stdio::inherit()
+                }
+            }
+        } else {
+            eprintln!("[sidecar-rs] DATA_DIR not set, sidecar stderr will not be logged");
+            Stdio::inherit()
+        };
+
+        let mut cmd = Command::new(path);
+        cmd.stdin(Stdio::piped())
+            .stdout(Stdio::piped())
+            .stderr(stderr_cfg);
+
+        // Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
+        #[cfg(target_os = "windows")]
+        cmd.creation_flags(0x08000000);
+
+        let child = cmd
+            .spawn()
+            .map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
+
+        self.attach(child)?;
+        self.wait_for_ready()
+    }
+
+    /// Spawn the Python sidecar in dev mode (system Python).
+    fn start_python_dev(&self) -> Result<(), String> {
+        self.stop().ok();
+        let python_dir = Self::resolve_python_dir()?;
+        let python_cmd = Self::find_python_command();
+        eprintln!(
+            "[sidecar-rs] Starting dev sidecar: {} -m voice_to_notes.main ({})",
+            python_cmd,
+            python_dir.display()
+        );
+
+        let child = Command::new(python_cmd)
            .arg("-m")
            .arg("voice_to_notes.main")
-            .current_dir(python_path)
-            .env("PYTHONPATH", python_path)
+            .current_dir(&python_dir)
+            .env("PYTHONPATH", &python_dir)
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(Stdio::inherit())
            .spawn()
-            .map_err(|e| format!("Failed to start sidecar: {e}"))?;
+            .map_err(|e| format!("Failed to start Python sidecar: {e}"))?;

-        // Take ownership of stdin and stdout separately
+        self.attach(child)?;
+        self.wait_for_ready()
+    }
+
+    /// Take ownership of a spawned child's stdin/stdout and store the process handle.
+    fn attach(&self, mut child: Child) -> Result<(), String> {
        let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
        let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
        let buf_reader = BufReader::new(stdout);
@@ -78,10 +373,6 @@ impl SidecarManager {
            let mut r = self.reader.lock().map_err(|e| e.to_string())?;
            *r = Some(buf_reader);
        }
-
-        // Wait for the "ready" message
-        self.wait_for_ready()?;
-
        Ok(())
    }

@@ -96,7 +387,22 @@ impl SidecarManager {
                    .read_line(&mut line)
                    .map_err(|e| format!("Read error: {e}"))?;
                if bytes == 0 {
-                    return Err("Sidecar closed stdout before sending ready".to_string());
+                    // Try to get the exit code for diagnostics
+                    let exit_info = {
+                        let mut proc = self.process.lock().map_err(|e| e.to_string())?;
+                        if let Some(ref mut child) = *proc {
+                            match child.try_wait() {
+                                Ok(Some(status)) => format!(" (exit status: {status})"),
+                                _ => String::new(),
+                            }
+                        } else {
+                            String::new()
+                        }
+                    };
+                    return Err(format!(
+                        "Sidecar closed stdout before sending ready{exit_info}. \
+                         The Python sidecar may have crashed on startup — check app logs for details."
+                    ));
                }
                let trimmed = line.trim();
                if trimmed.is_empty() {
@@ -107,8 +413,12 @@ impl SidecarManager {
                        return Ok(());
                    }
                }
-                // Non-ready message: something is wrong
-                break;
+                // Non-JSON or non-ready line — skip and keep waiting
+                eprintln!(
+                    "[sidecar-rs] Skipping pre-ready line: {}",
+                    &trimmed[..trimmed.len().min(200)]
+                );
+                continue;
            }
        }
        Err("Sidecar did not send ready message".to_string())
@@ -120,12 +430,51 @@ impl SidecarManager {
        self.send_and_receive_with_progress(msg, |_| {})
    }

-    /// Send a message and read the response, calling on_progress for each progress message.
-    pub fn send_and_receive_with_progress(
+    /// Send a message and receive the response, calling a callback for intermediate messages.
+    /// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
+    ///
+    /// If the sidecar has crashed (broken pipe), automatically restarts it and retries once.
+    pub fn send_and_receive_with_progress<F>(
        &self,
        msg: &IPCMessage,
-        on_progress: impl Fn(&IPCMessage),
-    ) -> Result<IPCMessage, String> {
+        on_intermediate: F,
+    ) -> Result<IPCMessage, String>
+    where
+        F: Fn(&IPCMessage),
+    {
+        match self.send_and_receive_inner(msg, &on_intermediate) {
+            Ok(response) => Ok(response),
+            Err(e)
+                if e.contains("Write error")
+                    || e.contains("closed stdout")
+                    || e.contains("not available") =>
+            {
+                eprintln!("[sidecar-rs] Sidecar communication failed ({e}), restarting...");
+                self.cleanup_handles();
+                // Stop any zombie process
+                {
+                    let mut proc = self.process.lock().map_err(|e| e.to_string())?;
+                    if let Some(ref mut child) = proc.take() {
+                        let _ = child.kill();
+                        let _ = child.wait();
+                    }
+                }
+                self.ensure_running()?;
+                self.send_and_receive_inner(msg, &on_intermediate)
+            }
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Inner implementation of send_and_receive.
+    fn send_and_receive_inner<F>(
+        &self,
+        msg: &IPCMessage,
+        on_intermediate: &F,
+    ) -> Result<IPCMessage, String>
+    where
+        F: Fn(&IPCMessage),
+    {
        // Write to stdin
        {
            let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
@@ -160,14 +509,19 @@ impl SidecarManager {
                    if trimmed.is_empty() {
                        continue;
                    }
-                    let response: IPCMessage = serde_json::from_str(trimmed)
-                        .map_err(|e| format!("Parse error: {e}"))?;
+                    let response: IPCMessage =
+                        serde_json::from_str(trimmed).map_err(|e| format!("Parse error: {e}"))?;

-                    if response.msg_type == "progress" {
-                        on_progress(&response);
-                        continue;
+                    // Forward intermediate messages via callback, return the final result/error
+                    let is_intermediate = matches!(
+                        response.msg_type.as_str(),
+                        "progress" | "pipeline.segment" | "pipeline.speaker_update"
+                    );
+                    if is_intermediate {
+                        on_intermediate(&response);
+                    } else {
+                        return Ok(response);
                    }
-                    return Ok(response);
                }
            } else {
                Err("Sidecar stdout not available".to_string())
@@ -203,8 +557,39 @@ impl SidecarManager {
    }

    pub fn is_running(&self) -> bool {
-        let proc = self.process.lock().ok();
-        proc.map_or(false, |p| p.is_some())
+        let mut proc = match self.process.lock() {
+            Ok(p) => p,
+            Err(_) => return false,
+        };
+        if let Some(ref mut child) = *proc {
+            // Check if the process has exited
+            match child.try_wait() {
+                Ok(Some(_status)) => {
+                    // Process has exited — clean up handles
+                    eprintln!("[sidecar-rs] Sidecar process has exited");
+                    drop(proc);
+                    let _ = self.cleanup_handles();
+                    false
+                }
+                Ok(None) => true, // Still running
+                Err(_) => false,
+            }
+        } else {
+            false
+        }
+    }
+
+    /// Clean up stdin/stdout/process handles after the sidecar has exited.
+    fn cleanup_handles(&self) {
+        if let Ok(mut s) = self.stdin.lock() {
+            *s = None;
+        }
+        if let Ok(mut r) = self.reader.lock() {
+            *r = None;
+        }
+        if let Ok(mut p) = self.process.lock() {
+            *p = None;
+        }
    }
 }

--- a/src-tauri/src/state.rs
+++ b/src-tauri/src/state.rs
@@ -15,12 +15,10 @@ pub struct AppState {
 impl AppState {
    pub fn new() -> Result<Self, String> {
        let data_dir = LlamaManager::data_dir();
-        std::fs::create_dir_all(&data_dir)
-            .map_err(|e| format!("Cannot create data dir: {e}"))?;
+        std::fs::create_dir_all(&data_dir).map_err(|e| format!("Cannot create data dir: {e}"))?;

        let db_path = data_dir.join("voice_to_notes.db");
-        let conn = db::open_database(&db_path)
-            .map_err(|e| format!("Cannot open database: {e}"))?;
+        let conn = db::open_database(&db_path).map_err(|e| format!("Cannot open database: {e}"))?;

        Ok(Self {
            db: Mutex::new(conn),
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -1,7 +1,7 @@
 {
  "$schema": "https://schema.tauri.app/config/2",
  "productName": "Voice to Notes",
-  "version": "0.1.0",
+  "version": "0.2.27",
  "identifier": "com.voicetonotes.app",
  "build": {
    "beforeDevCommand": "npm run dev",
@@ -16,11 +16,13 @@
        "width": 1200,
        "height": 800,
        "minWidth": 800,
-        "minHeight": 600
+        "minHeight": 600,
+        "decorations": true,
+        "transparent": false
      }
    ],
    "security": {
-      "csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
+      "csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'",
      "assetProtocol": {
        "enable": true,
        "scope": ["**"]
@@ -29,7 +31,7 @@
  },
  "bundle": {
    "active": true,
-    "targets": "all",
+    "targets": ["deb", "rpm", "nsis", "msi", "dmg"],
    "icon": [
      "icons/32x32.png",
      "icons/128x128.png",
@@ -40,17 +42,18 @@
    "category": "Utility",
    "shortDescription": "Transcribe audio/video with speaker identification",
    "longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
+    "resources": [],
    "copyright": "Voice to Notes Contributors",
    "license": "MIT",
    "linux": {
      "deb": {
-        "depends": ["python3", "python3-pip"]
-      },
-      "appimage": {
-        "bundleMediaFramework": true
+        "depends": []
      }
    },
    "windows": {
+      "nsis": {
+        "installerHooks": "nsis-hooks.nsh"
+      },
      "wix": {
        "language": "en-US"
      }
--- a/src/app.html
+++ b/src/app.html
@@ -1,5 +1,5 @@
 <!doctype html>
-<html lang="en">
+<html lang="en" style="margin:0;padding:0;background:#0a0a23;height:100%;">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" href="%sveltekit.assets%/favicon.png" />
@@ -7,7 +7,7 @@
    <title>Voice to Notes</title>
    %sveltekit.head%
  </head>
-  <body data-sveltekit-preload-data="hover">
+  <body data-sveltekit-preload-data="hover" style="margin:0;padding:0;background:#0a0a23;overflow:hidden;">
    <div style="display: contents">%sveltekit.body%</div>
  </body>
 </html>
--- a/src/lib/components/AIChatPanel.svelte
+++ b/src/lib/components/AIChatPanel.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { segments, speakers } from '$lib/stores/transcript';
+  import { settings } from '$lib/stores/settings';

  interface ChatMessage {
    role: 'user' | 'assistant';
@@ -43,9 +44,23 @@
        content: m.content,
      }));

+      // Ensure the provider is configured with current credentials before chatting
+      const s = $settings;
+      const configMap: Record<string, Record<string, string>> = {
+        openai: { api_key: s.openai_api_key, model: s.openai_model },
+        anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
+        litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
+        local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
+      };
+      const config = configMap[s.ai_provider];
+      if (config) {
+        await invoke('ai_configure', { provider: s.ai_provider, config });
+      }
+
      const result = await invoke<{ response: string }>('ai_chat', {
        messages: chatMessages,
        transcriptContext: getTranscriptContext(),
+        provider: s.ai_provider,
      });

      messages = [...messages, { role: 'assistant', content: result.response }];
@@ -73,6 +88,88 @@
    messages = [];
  }

+  function formatMarkdown(text: string): string {
+    // Split into lines for block-level processing
+    const lines = text.split('\n');
+    const result: string[] = [];
+    let inList = false;
+
+    for (let i = 0; i < lines.length; i++) {
+      let line = lines[i];
+
+      // Headers
+      if (line.startsWith('### ')) {
+        if (inList) { result.push('</ul>'); inList = false; }
+        const content = applyInlineFormatting(line.slice(4));
+        result.push(`<h4>${content}</h4>`);
+        continue;
+      }
+      if (line.startsWith('## ')) {
+        if (inList) { result.push('</ul>'); inList = false; }
+        const content = applyInlineFormatting(line.slice(3));
+        result.push(`<h3>${content}</h3>`);
+        continue;
+      }
+      if (line.startsWith('# ')) {
+        if (inList) { result.push('</ul>'); inList = false; }
+        const content = applyInlineFormatting(line.slice(2));
+        result.push(`<h2>${content}</h2>`);
+        continue;
+      }
+
+      // List items (- or *)
+      if (/^[\-\*] /.test(line)) {
+        if (!inList) { result.push('<ul>'); inList = true; }
+        const content = applyInlineFormatting(line.slice(2));
+        result.push(`<li>${content}</li>`);
+        continue;
+      }
+
+      // Numbered list items
+      if (/^\d+\.\s/.test(line)) {
+        if (!inList) { result.push('<ol>'); inList = true; }
+        const content = applyInlineFormatting(line.replace(/^\d+\.\s/, ''));
+        result.push(`<li>${content}</li>`);
+        continue;
+      }
+
+      // Non-list line: close any open list
+      if (inList) {
+        // Check if previous list was ordered or unordered
+        const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
+        result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
+        inList = false;
+      }
+
+      // Empty line = paragraph break
+      if (line.trim() === '') {
+        result.push('<br>');
+        continue;
+      }
+
+      // Regular text line
+      result.push(applyInlineFormatting(line));
+    }
+
+    // Close any trailing open list
+    if (inList) {
+      const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
+      result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
+    }
+
+    return result.join('\n');
+  }
+
+  function applyInlineFormatting(text: string): string {
+    // Code blocks (backtick) — process first to avoid conflicts
+    text = text.replace(/`([^`]+)`/g, '<code>$1</code>');
+    // Bold (**text**)
+    text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
+    // Italic (*text*) — only single asterisks not already consumed by bold
+    text = text.replace(/\*([^*]+)\*/g, '<em>$1</em>');
+    return text;
+  }
+
  // Quick action buttons
  async function summarize() {
    inputText = 'Please summarize this transcript in bullet points.';
@@ -107,7 +204,11 @@
    {:else}
      {#each messages as msg}
        <div class="message {msg.role}">
-          <div class="message-content">{msg.content}</div>
+          {#if msg.role === 'assistant'}
+            <div class="message-content">{@html formatMarkdown(msg.content)}</div>
+          {:else}
+            <div class="message-content">{msg.content}</div>
+          {/if}
        </div>
      {/each}
      {#if isLoading}
@@ -177,47 +278,101 @@
  }
  .empty-state {
    text-align: center;
-    color: #666;
-    font-size: 0.8rem;
-    padding: 1rem 0;
+    color: #888;
+    font-size: 0.85rem;
+    padding: 2rem 1rem;
+  }
+  .empty-state p {
+    margin-bottom: 1rem;
  }
  .quick-actions {
    display: flex;
-    gap: 0.5rem;
+    gap: 0.75rem;
    justify-content: center;
-    margin-top: 0.5rem;
+    margin-top: 1rem;
  }
  .quick-btn {
    background: rgba(233, 69, 96, 0.15);
    border: 1px solid rgba(233, 69, 96, 0.3);
    color: #e94560;
-    padding: 0.3rem 0.6rem;
-    border-radius: 4px;
+    padding: 0.45rem 0.85rem;
+    border-radius: 6px;
    cursor: pointer;
-    font-size: 0.75rem;
+    font-size: 0.8rem;
+    transition: background 0.15s;
  }
  .quick-btn:hover {
    background: rgba(233, 69, 96, 0.25);
  }
  .message {
-    margin-bottom: 0.5rem;
-    padding: 0.5rem 0.75rem;
-    border-radius: 6px;
+    margin-bottom: 0.75rem;
+    padding: 0.75rem 1rem;
+    border-radius: 8px;
    font-size: 0.8rem;
-    line-height: 1.4;
+    line-height: 1.55;
  }
  .message.user {
    background: rgba(233, 69, 96, 0.15);
-    margin-left: 1rem;
+    border-left: 3px solid rgba(233, 69, 96, 0.4);
  }
  .message.assistant {
    background: rgba(255, 255, 255, 0.05);
-    margin-right: 1rem;
+    border-left: 3px solid rgba(255, 255, 255, 0.1);
  }
  .message.loading {
    opacity: 0.6;
    font-style: italic;
  }
+
+  /* Markdown styles inside assistant messages */
+  .message.assistant :global(h2) {
+    font-size: 1rem;
+    font-weight: 600;
+    margin: 0.6rem 0 0.3rem;
+    color: #f0f0f0;
+  }
+  .message.assistant :global(h3) {
+    font-size: 0.9rem;
+    font-weight: 600;
+    margin: 0.5rem 0 0.25rem;
+    color: #e8e8e8;
+  }
+  .message.assistant :global(h4) {
+    font-size: 0.85rem;
+    font-weight: 600;
+    margin: 0.4rem 0 0.2rem;
+    color: #e0e0e0;
+  }
+  .message.assistant :global(strong) {
+    color: #f0f0f0;
+    font-weight: 600;
+  }
+  .message.assistant :global(em) {
+    color: #ccc;
+    font-style: italic;
+  }
+  .message.assistant :global(code) {
+    background: rgba(0, 0, 0, 0.3);
+    color: #e94560;
+    padding: 0.1rem 0.35rem;
+    border-radius: 3px;
+    font-size: 0.75rem;
+    font-family: 'Fira Code', 'Cascadia Code', 'Consolas', monospace;
+  }
+  .message.assistant :global(ul),
+  .message.assistant :global(ol) {
+    margin: 0.35rem 0;
+    padding-left: 1.3rem;
+  }
+  .message.assistant :global(li) {
+    margin-bottom: 0.25rem;
+    line-height: 1.5;
+  }
+  .message.assistant :global(br) {
+    display: block;
+    content: '';
+    margin-top: 0.35rem;
+  }
  .chat-input {
    display: flex;
    gap: 0.5rem;
--- a/src/lib/components/ProgressOverlay.svelte
+++ b/src/lib/components/ProgressOverlay.svelte
@@ -7,16 +7,88 @@
  }

  let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
+
+  // Pipeline steps in order
+  const pipelineSteps = [
+    { key: 'loading_model', label: 'Load transcription model' },
+    { key: 'transcribing', label: 'Transcribe audio' },
+    { key: 'loading_diarization', label: 'Load speaker detection model' },
+    { key: 'diarizing', label: 'Identify speakers' },
+    { key: 'merging', label: 'Merge results' },
+  ];
+
+  const stepOrder = pipelineSteps.map(s => s.key);
+
+  // Track the highest step index we've reached (never goes backward)
+  let highestStepIdx = $state(-1);
+
+  // Map non-step stages to step indices for progress tracking
+  function stageToStepIdx(s: string): number {
+    const direct = stepOrder.indexOf(s);
+    if (direct >= 0) return direct;
+    // 'pipeline' stage appears before known steps — don't change highwater mark
+    return -1;
+  }
+
+  $effect(() => {
+    if (!visible) {
+      highestStepIdx = -1;
+      return;
+    }
+    const idx = stageToStepIdx(stage);
+    if (idx > highestStepIdx) {
+      highestStepIdx = idx;
+    }
+  });
+
+  function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
+    if (stepIdx < highestStepIdx) return 'done';
+    if (stepIdx === highestStepIdx) return 'active';
+    return 'pending';
+  }
+
+  // User-friendly display of current stage
+  const stageLabels: Record<string, string> = {
+    'pipeline': 'Initializing...',
+    'loading_model': 'Loading Model',
+    'transcribing': 'Transcribing',
+    'loading_diarization': 'Loading Diarization',
+    'diarizing': 'Speaker Detection',
+    'merging': 'Merging Results',
+    'done': 'Complete',
+  };
+
+  let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
 </script>

 {#if visible}
  <div class="overlay">
    <div class="progress-card">
-      <h3>{stage}</h3>
-      <div class="bar-track">
-        <div class="bar-fill" style="width: {percent}%"></div>
+      <div class="spinner-row">
+        <div class="spinner"></div>
+        <h3>{displayStage}</h3>
      </div>
-      <p>{percent}% — {message}</p>
+
+      <div class="steps">
+        {#each pipelineSteps as step, idx}
+          {@const status = getStepStatus(idx)}
+          <div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
+            <span class="step-icon">
+              {#if status === 'done'}
+                ✓
+              {:else if status === 'active'}
+                ⟳
+              {:else}
+                ·
+              {/if}
+            </span>
+            <span class="step-label">{step.label}</span>
+          </div>
+        {/each}
+      </div>
+
+      <p class="status-text">{message || 'Please wait...'}</p>
+      <p class="hint-text">This may take several minutes for large files</p>
    </div>
  </div>
 {/if}
@@ -25,34 +97,81 @@
  .overlay {
    position: fixed;
    inset: 0;
-    background: rgba(0, 0, 0, 0.7);
+    background: rgba(0, 0, 0, 0.8);
    display: flex;
    align-items: center;
    justify-content: center;
-    z-index: 1000;
+    z-index: 9999;
  }
  .progress-card {
    background: #16213e;
-    padding: 2rem;
+    padding: 2rem 2.5rem;
    border-radius: 12px;
-    min-width: 400px;
+    min-width: 380px;
+    max-width: 440px;
    color: #e0e0e0;
+    border: 1px solid #2a3a5e;
+    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
  }
-  h3 { margin: 0 0 1rem; text-transform: capitalize; }
-  .bar-track {
-    height: 8px;
-    background: #0f3460;
-    border-radius: 4px;
-    overflow: hidden;
+  .spinner-row {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    margin-bottom: 1.25rem;
  }
-  .bar-fill {
-    height: 100%;
-    background: #e94560;
-    transition: width 0.3s;
+  .spinner {
+    width: 20px;
+    height: 20px;
+    border: 3px solid #2a3a5e;
+    border-top-color: #e94560;
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+    flex-shrink: 0;
  }
-  p {
+  @keyframes spin {
+    to { transform: rotate(360deg); }
+  }
+  h3 {
+    margin: 0;
+    font-size: 1.1rem;
+  }
+  .steps {
+    display: flex;
+    flex-direction: column;
+    gap: 0.4rem;
+    margin-bottom: 1rem;
+  }
+  .step {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    font-size: 0.85rem;
+    color: #555;
+  }
+  .step-done {
+    color: #4ecdc4;
+  }
+  .step-active {
+    color: #e0e0e0;
+    font-weight: 500;
+  }
+  .step-icon {
+    width: 1.2rem;
+    text-align: center;
+    flex-shrink: 0;
+  }
+  .step-active .step-icon {
+    animation: spin 1.5s linear infinite;
+    display: inline-block;
+  }
+  .status-text {
+    margin: 0.75rem 0 0;
+    font-size: 0.85rem;
+    color: #b0b0b0;
+  }
+  .hint-text {
    margin: 0.5rem 0 0;
-    font-size: 0.875rem;
-    color: #999;
+    font-size: 0.75rem;
+    color: #555;
  }
 </style>
--- a/src/lib/components/SettingsModal.svelte
+++ b/src/lib/components/SettingsModal.svelte
@@ -1,4 +1,6 @@
 <script lang="ts">
+  import { invoke } from '@tauri-apps/api/core';
+  import { openUrl } from '@tauri-apps/plugin-opener';
  import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';

  interface Props {
@@ -9,7 +11,34 @@
  let { visible, onClose }: Props = $props();

  let localSettings = $state<AppSettings>({ ...$settings });
-  let activeTab = $state<'transcription' | 'ai' | 'local'>('transcription');
+  let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'debug'>('transcription');
+  let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
+  let modelError = $state('');
+  let revealedFields = $state<Set<string>>(new Set());
+
+  async function testAndDownloadModel() {
+    if (!localSettings.hf_token) {
+      modelStatus = 'error';
+      modelError = 'Please enter a HuggingFace token first.';
+      return;
+    }
+    modelStatus = 'downloading';
+    modelError = '';
+    try {
+      const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
+        hfToken: localSettings.hf_token,
+      });
+      if (result.ok) {
+        modelStatus = 'success';
+      } else {
+        modelStatus = 'error';
+        modelError = result.error || 'Unknown error';
+      }
+    } catch (err) {
+      modelStatus = 'error';
+      modelError = String(err);
+    }
+  }

  // Sync when settings store changes
  $effect(() => {
@@ -46,11 +75,14 @@
        <button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
          Transcription
        </button>
+        <button class="tab" class:active={activeTab === 'speakers'} onclick={() => activeTab = 'speakers'}>
+          Speakers
+        </button>
        <button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
          AI Provider
        </button>
-        <button class="tab" class:active={activeTab === 'local'} onclick={() => activeTab = 'local'}>
-          Local AI
+        <button class="tab" class:active={activeTab === 'debug'} onclick={() => activeTab = 'debug'}>
+          Debug
        </button>
      </div>

@@ -77,27 +109,105 @@
            <label for="stt-lang">Language (blank = auto-detect)</label>
            <input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
          </div>
-          <div class="field checkbox">
+        {:else if activeTab === 'speakers'}
+          <div class="field">
+            <label for="hf-token">HuggingFace Token</label>
+            <div class="input-reveal">
+              <input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
+              <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
+            </div>
+          </div>
+          <div class="info-box">
+            <p class="info-title">Setup (one-time)</p>
+            <p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
+            <ol>
+              <li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
+              <li>Accept the license on <strong>all three</strong> of these pages:
+                <ul>
+                  <!-- svelte-ignore a11y_no_static_element_interactions -->
+                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
+                  <!-- svelte-ignore a11y_no_static_element_interactions -->
+                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
+                  <!-- svelte-ignore a11y_no_static_element_interactions -->
+                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-community-1')}>pyannote/speaker-diarization-community-1</a></li>
+                </ul>
+              </li>
+              <!-- svelte-ignore a11y_no_static_element_interactions -->
+              <li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
+              <li>Paste the token above and click <strong>Test & Download</strong></li>
+            </ol>
+          </div>
+          <button
+            class="btn-download"
+            onclick={testAndDownloadModel}
+            disabled={modelStatus === 'downloading'}
+          >
+            {#if modelStatus === 'downloading'}
+              Downloading model...
+            {:else}
+              Test & Download Model
+            {/if}
+          </button>
+          {#if modelStatus === 'success'}
+            <p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
+          {/if}
+          {#if modelStatus === 'error'}
+            <p class="status-error">{modelError}</p>
+          {/if}
+          <div class="field" style="margin-top: 1rem;">
+            <label for="num-speakers">Number of speakers</label>
+            <select
+              id="num-speakers"
+              value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
+              onchange={(e) => {
+                const v = parseInt((e.target as HTMLSelectElement).value, 10);
+                localSettings.num_speakers = v === 0 ? null : v;
+              }}
+            >
+              <option value="0">Auto-detect</option>
+              {#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
+                <option value={String(n)}>{n}</option>
+              {/each}
+            </select>
+            <p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
+          </div>
+          <div class="field checkbox" style="margin-top: 1rem;">
            <label>
              <input type="checkbox" bind:checked={localSettings.skip_diarization} />
-              Skip speaker diarization (faster, no speaker labels)
+              Skip speaker detection (faster, no speaker labels)
            </label>
          </div>
        {:else if activeTab === 'ai'}
          <div class="field">
            <label for="ai-provider">AI Provider</label>
            <select id="ai-provider" bind:value={localSettings.ai_provider}>
-              <option value="local">Local (llama-server)</option>
+              <option value="local">Ollama</option>
              <option value="openai">OpenAI</option>
              <option value="anthropic">Anthropic</option>
-              <option value="litellm">LiteLLM</option>
+              <option value="litellm">OpenAI Compatible</option>
            </select>
          </div>

-          {#if localSettings.ai_provider === 'openai'}
+          {#if localSettings.ai_provider === 'local'}
+            <div class="field">
+              <label for="ollama-url">Ollama URL</label>
+              <input id="ollama-url" type="text" bind:value={localSettings.ollama_url} placeholder="http://localhost:11434" />
+            </div>
+            <div class="field">
+              <label for="ollama-model">Model</label>
+              <input id="ollama-model" type="text" bind:value={localSettings.ollama_model} placeholder="llama3.2" />
+            </div>
+            <p class="hint">
+              Install Ollama from ollama.com, then pull a model with <code>ollama pull llama3.2</code>.
+              The app connects via Ollama's OpenAI-compatible API.
+            </p>
+          {:else if localSettings.ai_provider === 'openai'}
            <div class="field">
              <label for="openai-key">OpenAI API Key</label>
-              <input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+              <div class="input-reveal">
+                <input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
+              </div>
            </div>
            <div class="field">
              <label for="openai-model">Model</label>
@@ -106,31 +216,47 @@
          {:else if localSettings.ai_provider === 'anthropic'}
            <div class="field">
              <label for="anthropic-key">Anthropic API Key</label>
-              <input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+              <div class="input-reveal">
+                <input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
+              </div>
            </div>
            <div class="field">
              <label for="anthropic-model">Model</label>
              <input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
            </div>
          {:else if localSettings.ai_provider === 'litellm'}
+            <div class="field">
+              <label for="litellm-base">API Base URL</label>
+              <input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
+            </div>
+            <div class="field">
+              <label for="litellm-key">API Key</label>
+              <div class="input-reveal">
+                <input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
+              </div>
+            </div>
            <div class="field">
              <label for="litellm-model">Model</label>
              <input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
            </div>
          {/if}
-        {:else}
-          <div class="field">
-            <label for="llama-binary">llama-server Binary Path</label>
-            <input id="llama-binary" type="text" bind:value={localSettings.local_binary_path} placeholder="llama-server" />
+        {:else if activeTab === 'debug'}
+          <div class="field checkbox">
+            <label>
+              <input
+                type="checkbox"
+                checked={localSettings.devtools_enabled}
+                onchange={async (e) => {
+                  localSettings.devtools_enabled = (e.target as HTMLInputElement).checked;
+                  await invoke('toggle_devtools', { open: localSettings.devtools_enabled });
+                }}
+              />
+              Enable Developer Tools
+            </label>
+            <p class="hint">Opens the browser inspector for debugging. Changes take effect immediately.</p>
          </div>
-          <div class="field">
-            <label for="llama-model">GGUF Model Path</label>
-            <input id="llama-model" type="text" bind:value={localSettings.local_model_path} placeholder="~/.voicetonotes/models/model.gguf" />
-          </div>
-          <p class="hint">
-            Place GGUF model files in ~/.voicetonotes/models/ for auto-detection.
-            The local AI server uses the OpenAI-compatible API from llama.cpp.
-          </p>
        {/if}
      </div>

@@ -220,11 +346,36 @@
    color: #aaa;
    margin-bottom: 0.3rem;
  }
+  .input-reveal {
+    display: flex;
+    gap: 0;
+  }
+  .input-reveal input {
+    flex: 1;
+    border-top-right-radius: 0;
+    border-bottom-right-radius: 0;
+  }
+  .reveal-btn {
+    background: #0f3460;
+    border: 1px solid #4a5568;
+    border-left: none;
+    color: #aaa;
+    padding: 0.5rem 0.6rem;
+    border-radius: 0 4px 4px 0;
+    cursor: pointer;
+    font-size: 0.75rem;
+    white-space: nowrap;
+  }
+  .reveal-btn:hover {
+    color: #e0e0e0;
+    background: #1a4a7a;
+  }
  .field input,
  .field select {
    width: 100%;
    background: #1a1a2e;
    color: #e0e0e0;
+    color-scheme: dark;
    border: 1px solid #4a5568;
    border-radius: 4px;
    padding: 0.5rem;
@@ -252,6 +403,79 @@
    color: #666;
    line-height: 1.4;
  }
+  .info-box {
+    background: rgba(233, 69, 96, 0.05);
+    border: 1px solid #2a3a5e;
+    border-radius: 6px;
+    padding: 0.75rem 1rem;
+    margin-bottom: 1rem;
+    font-size: 0.8rem;
+    color: #b0b0b0;
+    line-height: 1.5;
+  }
+  .info-box p {
+    margin: 0 0 0.5rem;
+  }
+  .info-box p:last-child {
+    margin-bottom: 0;
+  }
+  .info-box .info-title {
+    color: #e0e0e0;
+    font-weight: 600;
+    font-size: 0.8rem;
+  }
+  .info-box ol {
+    margin: 0.25rem 0 0.5rem;
+    padding-left: 1.25rem;
+  }
+  .info-box li {
+    margin-bottom: 0.25rem;
+  }
+  .info-box strong {
+    color: #e0e0e0;
+  }
+  .ext-link {
+    color: #e94560;
+    cursor: pointer;
+    text-decoration: underline;
+  }
+  .ext-link:hover {
+    color: #ff6b81;
+  }
+  .info-box ul {
+    margin: 0.25rem 0;
+    padding-left: 1.25rem;
+  }
+  .btn-download {
+    background: #0f3460;
+    border: 1px solid #4a5568;
+    color: #e0e0e0;
+    padding: 0.5rem 1rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.85rem;
+    width: 100%;
+    margin-bottom: 0.5rem;
+  }
+  .btn-download:hover:not(:disabled) {
+    background: #1a4a7a;
+    border-color: #e94560;
+  }
+  .btn-download:disabled {
+    opacity: 0.6;
+    cursor: not-allowed;
+  }
+  .status-success {
+    color: #4ecdc4;
+    font-size: 0.8rem;
+    margin: 0.25rem 0;
+  }
+  .status-error {
+    color: #e94560;
+    font-size: 0.8rem;
+    margin: 0.25rem 0;
+    word-break: break-word;
+  }
  .modal-footer {
    display: flex;
    justify-content: flex-end;
--- a/src/lib/components/SidecarSetup.svelte
+++ b/src/lib/components/SidecarSetup.svelte
@@ -0,0 +1,320 @@
+<script lang="ts">
+  import { invoke } from '@tauri-apps/api/core';
+  import { listen } from '@tauri-apps/api/event';
+  import type { UnlistenFn } from '@tauri-apps/api/event';
+  import { onMount } from 'svelte';
+
+  interface Props {
+    onComplete: () => void;
+  }
+
+  let { onComplete }: Props = $props();
+
+  let variant = $state<'cpu' | 'cuda'>('cpu');
+  let downloading = $state(false);
+  let downloadProgress = $state({ downloaded: 0, total: 0, percent: 0 });
+  let error = $state('');
+  let success = $state(false);
+
+  let unlisten: UnlistenFn | null = null;
+
+  onMount(() => {
+    return () => {
+      unlisten?.();
+    };
+  });
+
+  function formatBytes(bytes: number): string {
+    if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+    if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
+    return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
+  }
+
+  async function startDownload() {
+    downloading = true;
+    error = '';
+    success = false;
+
+    unlisten = await listen<{ downloaded: number; total: number; percent: number }>(
+      'sidecar-download-progress',
+      (event) => {
+        downloadProgress = event.payload;
+      }
+    );
+
+    try {
+      await invoke('download_sidecar', { variant });
+      success = true;
+      // Brief pause so the user sees "Complete" before the screen goes away
+      setTimeout(() => {
+        onComplete();
+      }, 800);
+    } catch (err) {
+      error = String(err);
+    } finally {
+      downloading = false;
+      unlisten?.();
+      unlisten = null;
+    }
+  }
+</script>
+
+<div class="setup-overlay">
+  <div class="setup-card">
+    <h1 class="app-title">Voice to Notes</h1>
+    <h2 class="setup-heading">First-Time Setup</h2>
+    <p class="setup-description">
+      Voice to Notes needs to download its AI engine to transcribe audio.
+    </p>
+
+    {#if !downloading && !success}
+      <div class="variant-options">
+        <label class="variant-option" class:selected={variant === 'cpu'}>
+          <input type="radio" name="variant" value="cpu" bind:group={variant} />
+          <div class="variant-info">
+            <span class="variant-label">Standard (CPU)</span>
+            <span class="variant-desc">Works on all computers (~500 MB download)</span>
+          </div>
+        </label>
+        <label class="variant-option" class:selected={variant === 'cuda'}>
+          <input type="radio" name="variant" value="cuda" bind:group={variant} />
+          <div class="variant-info">
+            <span class="variant-label">GPU Accelerated (CUDA)</span>
+            <span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
+          </div>
+        </label>
+      </div>
+
+      {#if error}
+        <div class="error-box">
+          <p class="error-text">{error}</p>
+          <button class="btn-retry" onclick={startDownload}>Retry</button>
+        </div>
+      {:else}
+        <button class="btn-download" onclick={startDownload}>
+          Download &amp; Install
+        </button>
+      {/if}
+    {:else if downloading}
+      <div class="progress-section">
+        <div class="progress-bar-track">
+          <div class="progress-bar-fill" style="width: {downloadProgress.percent}%"></div>
+        </div>
+        <p class="progress-text">
+          {downloadProgress.percent}% — {formatBytes(downloadProgress.downloaded)} / {formatBytes(downloadProgress.total)}
+        </p>
+        <p class="progress-hint">Downloading {variant === 'cuda' ? 'GPU' : 'CPU'} engine...</p>
+      </div>
+    {:else if success}
+      <div class="success-section">
+        <div class="success-icon">&#10003;</div>
+        <p class="success-text">Setup complete!</p>
+      </div>
+    {/if}
+  </div>
+</div>
+
+<style>
+  .setup-overlay {
+    position: fixed;
+    inset: 0;
+    background: #0a0a23;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 10000;
+  }
+
+  .setup-card {
+    background: #16213e;
+    border: 1px solid #2a3a5e;
+    border-radius: 12px;
+    padding: 2.5rem 3rem;
+    max-width: 480px;
+    width: 90vw;
+    color: #e0e0e0;
+    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
+    text-align: center;
+  }
+
+  .app-title {
+    font-size: 1.8rem;
+    margin: 0 0 0.25rem;
+    color: #e94560;
+    font-weight: 700;
+  }
+
+  .setup-heading {
+    font-size: 1.1rem;
+    margin: 0 0 0.75rem;
+    color: #e0e0e0;
+    font-weight: 500;
+  }
+
+  .setup-description {
+    font-size: 0.9rem;
+    color: #b0b0b0;
+    margin: 0 0 1.5rem;
+    line-height: 1.5;
+  }
+
+  .variant-options {
+    display: flex;
+    flex-direction: column;
+    gap: 0.75rem;
+    margin-bottom: 1.5rem;
+    text-align: left;
+  }
+
+  .variant-option {
+    display: flex;
+    align-items: flex-start;
+    gap: 0.75rem;
+    padding: 0.85rem 1rem;
+    border: 1px solid #2a3a5e;
+    border-radius: 8px;
+    cursor: pointer;
+    transition: border-color 0.15s, background 0.15s;
+  }
+
+  .variant-option:hover {
+    border-color: #4a5568;
+    background: rgba(255, 255, 255, 0.02);
+  }
+
+  .variant-option.selected {
+    border-color: #e94560;
+    background: rgba(233, 69, 96, 0.08);
+  }
+
+  .variant-option input[type='radio'] {
+    margin-top: 0.2rem;
+    accent-color: #e94560;
+    flex-shrink: 0;
+  }
+
+  .variant-info {
+    display: flex;
+    flex-direction: column;
+    gap: 0.2rem;
+  }
+
+  .variant-label {
+    font-size: 0.9rem;
+    font-weight: 500;
+    color: #e0e0e0;
+  }
+
+  .variant-desc {
+    font-size: 0.78rem;
+    color: #888;
+  }
+
+  .btn-download {
+    background: #e94560;
+    border: none;
+    color: white;
+    padding: 0.7rem 1.5rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.9rem;
+    font-weight: 500;
+    width: 100%;
+    transition: background 0.15s;
+  }
+
+  .btn-download:hover {
+    background: #d63851;
+  }
+
+  .progress-section {
+    margin-top: 0.5rem;
+  }
+
+  .progress-bar-track {
+    width: 100%;
+    height: 8px;
+    background: #1a1a2e;
+    border-radius: 4px;
+    overflow: hidden;
+    border: 1px solid #2a3a5e;
+  }
+
+  .progress-bar-fill {
+    height: 100%;
+    background: #e94560;
+    border-radius: 4px;
+    transition: width 0.3s ease;
+  }
+
+  .progress-text {
+    margin: 0.75rem 0 0;
+    font-size: 0.85rem;
+    color: #e0e0e0;
+    font-variant-numeric: tabular-nums;
+  }
+
+  .progress-hint {
+    margin: 0.35rem 0 0;
+    font-size: 0.78rem;
+    color: #888;
+  }
+
+  .error-box {
+    background: rgba(233, 69, 96, 0.1);
+    border: 1px solid rgba(233, 69, 96, 0.3);
+    border-radius: 8px;
+    padding: 1rem;
+  }
+
+  .error-text {
+    color: #e94560;
+    font-size: 0.85rem;
+    margin: 0 0 0.75rem;
+    word-break: break-word;
+    line-height: 1.4;
+  }
+
+  .btn-retry {
+    background: #e94560;
+    border: none;
+    color: white;
+    padding: 0.5rem 1.25rem;
+    border-radius: 6px;
+    cursor: pointer;
+    font-size: 0.85rem;
+    font-weight: 500;
+  }
+
+  .btn-retry:hover {
+    background: #d63851;
+  }
+
+  .success-section {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 0.5rem;
+    padding: 1rem 0;
+  }
+
+  .success-icon {
+    width: 48px;
+    height: 48px;
+    border-radius: 50%;
+    background: rgba(78, 205, 196, 0.15);
+    color: #4ecdc4;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 1.5rem;
+    font-weight: 700;
+  }
+
+  .success-text {
+    color: #4ecdc4;
+    font-size: 1rem;
+    margin: 0;
+    font-weight: 500;
+  }
+</style>
--- a/src/lib/components/SpeakerManager.svelte
+++ b/src/lib/components/SpeakerManager.svelte
@@ -1,5 +1,6 @@
 <script lang="ts">
  import { speakers } from '$lib/stores/transcript';
+  import { settings } from '$lib/stores/settings';
  import type { Speaker } from '$lib/types/transcript';

  let editingSpeakerId = $state<string | null>(null);
@@ -34,7 +35,14 @@
 <div class="speaker-manager">
  <h3>Speakers</h3>
  {#if $speakers.length === 0}
-    <p class="empty-hint">No speakers detected yet</p>
+    <p class="empty-hint">No speakers detected</p>
+    {#if $settings.skip_diarization}
+      <p class="setup-hint">Speaker detection is disabled. Enable it in Settings &gt; Speakers.</p>
+    {:else if !$settings.hf_token}
+      <p class="setup-hint">Speaker detection requires a HuggingFace token. Configure it in Settings &gt; Speakers.</p>
+    {:else}
+      <p class="setup-hint">Speaker detection ran but found no distinct speakers, or the model may need to be downloaded. Check Settings &gt; Speakers.</p>
+    {/if}
  {:else}
    <ul class="speaker-list">
      {#each $speakers as speaker (speaker.id)}
@@ -78,6 +86,19 @@
  .empty-hint {
    color: #666;
    font-size: 0.875rem;
+    margin-bottom: 0.25rem;
+  }
+  .setup-hint {
+    color: #555;
+    font-size: 0.75rem;
+    line-height: 1.4;
+  }
+  .setup-hint code {
+    background: rgba(233, 69, 96, 0.15);
+    color: #e94560;
+    padding: 0.1rem 0.3rem;
+    border-radius: 3px;
+    font-size: 0.7rem;
  }
  .speaker-list {
    list-style: none;
--- a/src/lib/components/TranscriptEditor.svelte
+++ b/src/lib/components/TranscriptEditor.svelte
@@ -60,12 +60,14 @@
  function finishEditing(segmentId: string) {
    const trimmed = editText.trim();
    if (trimmed) {
-      // Update the segment text in the store
      segments.update(segs => segs.map(s => {
        if (s.id !== segmentId) return s;
+        const newWordTexts = trimmed.split(/\s+/);
+        const newWords = redistributeWords(s, newWordTexts);
        return {
          ...s,
          text: trimmed,
+          words: newWords,
          original_text: s.original_text ?? s.text,
          is_edited: true,
          edited_at: new Date().toISOString(),
@@ -76,6 +78,106 @@
    editingSegmentId = null;
  }

+  /**
+   * Redistribute word timing after an edit.
+   *
+   * Uses a diff-like alignment between old and new word lists:
+   * - Unchanged words keep their original timing
+   * - Spelling fixes (same position, same count) keep timing
+   * - Split words (1 old → N new) divide the original time range proportionally
+   * - Inserted words with no match get interpolated timing
+   */
+  function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
+    const oldWords = segment.words;
+
+    // Same word count — preserve per-word timing (spelling fixes)
+    if (newWordTexts.length === oldWords.length) {
+      return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
+    }
+
+    // Align old words to new words using a simple greedy match.
+    // Build a mapping: for each old word, which new words does it cover?
+    const oldTexts = oldWords.map(w => w.word.toLowerCase());
+    const newTexts = newWordTexts.map(w => w.toLowerCase());
+
+    // Walk both lists, greedily matching old words to new words
+    const result: Word[] = [];
+    let oldIdx = 0;
+    let newIdx = 0;
+
+    while (newIdx < newTexts.length) {
+      if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
+        // Exact match — keep original timing
+        result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
+        oldIdx++;
+        newIdx++;
+      } else if (oldIdx < oldTexts.length) {
+        // Check if old word was split into multiple new words.
+        // E.g., "gonna" → "going to": see if concatenating upcoming new words
+        // matches the old word (or close enough — just check if old word's chars
+        // are consumed by the next few new words).
+        let splitCount = 0;
+        let combined = '';
+        for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
+          combined += (k > newIdx ? '' : '') + newTexts[k];
+          if (combined.length >= oldTexts[oldIdx].length) {
+            splitCount = k - newIdx + 1;
+            break;
+          }
+        }
+
+        if (splitCount > 1) {
+          // Split: distribute the old word's time range proportionally
+          const ow = oldWords[oldIdx];
+          const totalDuration = ow.end_ms - ow.start_ms;
+          for (let k = 0; k < splitCount; k++) {
+            const fraction = 1 / splitCount;
+            result.push({
+              id: `${segment.id}-word-${newIdx + k}`,
+              segment_id: segment.id,
+              word: newWordTexts[newIdx + k],
+              start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
+              end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
+              confidence: ow.confidence,
+              word_index: newIdx + k,
+            });
+          }
+          oldIdx++;
+          newIdx += splitCount;
+        } else {
+          // No match found — interpolate timing from neighbors
+          const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
+          const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
+          result.push({
+            id: `${segment.id}-word-${newIdx}`,
+            segment_id: segment.id,
+            word: newWordTexts[newIdx],
+            start_ms: prevEnd,
+            end_ms: nextStart,
+            confidence: 1.0,
+            word_index: newIdx,
+          });
+          newIdx++;
+        }
+      } else {
+        // No more old words — use end of segment
+        const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
+        result.push({
+          id: `${segment.id}-word-${newIdx}`,
+          segment_id: segment.id,
+          word: newWordTexts[newIdx],
+          start_ms: prevEnd,
+          end_ms: segment.end_ms,
+          confidence: 1.0,
+          word_index: newIdx,
+        });
+        newIdx++;
+      }
+    }
+
+    return result;
+  }
+
  function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
    if (e.key === 'Escape') {
      editingSegmentId = null;
@@ -170,7 +272,9 @@
 <style>
  .transcript-editor {
    flex: 1;
+    min-width: 0;
    overflow-y: auto;
+    overflow-x: hidden;
    padding: 1rem;
    background: #16213e;
    border-radius: 8px;
@@ -217,6 +321,9 @@
  .segment-text {
    line-height: 1.6;
    padding-left: 0.75rem;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    overflow-wrap: break-word;
  }
  .word {
    cursor: pointer;
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -12,6 +12,8 @@

  let container: HTMLDivElement;
  let wavesurfer: WaveSurfer | null = $state(null);
+  let isReady = $state(false);
+  let isLoading = $state(false);
  let currentTime = $state('0:00');
  let totalTime = $state('0:00');

@@ -31,6 +33,7 @@
      barWidth: 2,
      barGap: 1,
      barRadius: 2,
+      backend: 'WebAudio',
    });

    wavesurfer.on('timeupdate', (time: number) => {
@@ -39,6 +42,8 @@
    });

    wavesurfer.on('ready', () => {
+      isReady = true;
+      isLoading = false;
      const dur = wavesurfer!.getDuration();
      durationMs.set(Math.round(dur * 1000));
      totalTime = formatTime(dur);
@@ -48,8 +53,12 @@
    wavesurfer.on('pause', () => isPlaying.set(false));
    wavesurfer.on('finish', () => isPlaying.set(false));

+    wavesurfer.on('loading', () => {
+      isReady = false;
+    });
+
    if (audioUrl) {
-      wavesurfer.load(audioUrl);
+      loadAudio(audioUrl);
    }
  });

@@ -57,20 +66,21 @@
    wavesurfer?.destroy();
  });

-  /** Toggle play/pause. Exposed for keyboard shortcuts. */
+  /** Toggle play/pause from current position. Exposed for keyboard shortcuts. */
  export function togglePlayPause() {
-    wavesurfer?.playPause();
+    if (!wavesurfer || !isReady) return;
+    wavesurfer.playPause();
  }

  function skipBack() {
-    if (wavesurfer) {
+    if (wavesurfer && isReady) {
      const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
      wavesurfer.setTime(time);
    }
  }

  function skipForward() {
-    if (wavesurfer) {
+    if (wavesurfer && isReady) {
      const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
      wavesurfer.setTime(time);
    }
@@ -78,16 +88,17 @@

  /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
  export function seekTo(timeMs: number) {
-    if (wavesurfer) {
-      wavesurfer.setTime(timeMs / 1000);
-      if (!wavesurfer.isPlaying()) {
-        wavesurfer.play();
-      }
+    if (!wavesurfer || !isReady) {
+      console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
+      return;
    }
+    wavesurfer.setTime(timeMs / 1000);
  }

  /** Load a new audio file. */
  export function loadAudio(url: string) {
+    isReady = false;
+    isLoading = true;
    wavesurfer?.load(url);
  }
 </script>
@@ -95,11 +106,17 @@
 <div class="waveform-player">
  <div class="waveform-container" bind:this={container}></div>
  <div class="controls">
-    <button class="control-btn" onclick={skipBack} title="Back 5s">⏪</button>
-    <button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause">
-      {#if $isPlaying}⏸{:else}▶{/if}
+    <button class="control-btn" onclick={skipBack} title="Back 5s" disabled={!isReady}>⏪</button>
+    <button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause" disabled={!isReady}>
+      {#if !isReady}
+        ⏳
+      {:else if $isPlaying}
+        ⏸
+      {:else}
+        ▶
+      {/if}
    </button>
-    <button class="control-btn" onclick={skipForward} title="Forward 5s">⏩</button>
+    <button class="control-btn" onclick={skipForward} title="Forward 5s" disabled={!isReady}>⏩</button>
    <span class="time">{currentTime} / {totalTime}</span>
  </div>
 </div>
@@ -129,9 +146,13 @@
    cursor: pointer;
    font-size: 1rem;
  }
-  .control-btn:hover {
+  .control-btn:hover:not(:disabled) {
    background: #1a4a7a;
  }
+  .control-btn:disabled {
+    opacity: 0.4;
+    cursor: not-allowed;
+  }
  .play-btn {
    padding: 0.4rem 1rem;
    font-size: 1.2rem;
--- a/src/lib/stores/settings.ts
+++ b/src/lib/stores/settings.ts
@@ -8,12 +8,17 @@ export interface AppSettings {
  openai_model: string;
  anthropic_model: string;
  litellm_model: string;
-  local_model_path: string;
-  local_binary_path: string;
+  litellm_api_key: string;
+  litellm_api_base: string;
+  ollama_url: string;
+  ollama_model: string;
  transcription_model: string;
  transcription_device: string;
  transcription_language: string;
  skip_diarization: boolean;
+  hf_token: string;
+  num_speakers: number | null;
+  devtools_enabled: boolean;
 }

 const defaults: AppSettings = {
@@ -23,12 +28,17 @@ const defaults: AppSettings = {
  openai_model: 'gpt-4o-mini',
  anthropic_model: 'claude-sonnet-4-6',
  litellm_model: 'gpt-4o-mini',
-  local_model_path: '',
-  local_binary_path: 'llama-server',
+  litellm_api_key: '',
+  litellm_api_base: '',
+  ollama_url: 'http://localhost:11434',
+  ollama_model: 'llama3.2',
  transcription_model: 'base',
  transcription_device: 'cpu',
  transcription_language: '',
  skip_diarization: false,
+  hf_token: '',
+  num_speakers: null,
+  devtools_enabled: false,
 };

 export const settings = writable<AppSettings>({ ...defaults });
@@ -45,4 +55,20 @@ export async function loadSettings(): Promise<void> {
 export async function saveSettings(s: AppSettings): Promise<void> {
  settings.set(s);
  await invoke('save_settings', { settings: s });
+
+  // Configure the AI provider in the Python sidecar
+  const configMap: Record<string, Record<string, string>> = {
+    openai: { api_key: s.openai_api_key, model: s.openai_model },
+    anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
+    litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
+    local: { model: s.ollama_model, base_url: s.ollama_url + '/v1' },
+  };
+  const config = configMap[s.ai_provider];
+  if (config) {
+    try {
+      await invoke('ai_configure', { provider: s.ai_provider, config });
+    } catch {
+      // Sidecar may not be running yet — provider will be configured on first use
+    }
+  }
 }
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -8,17 +8,72 @@
  import AIChatPanel from '$lib/components/AIChatPanel.svelte';
  import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
  import SettingsModal from '$lib/components/SettingsModal.svelte';
+  import SidecarSetup from '$lib/components/SidecarSetup.svelte';
  import { segments, speakers } from '$lib/stores/transcript';
  import { settings, loadSettings } from '$lib/stores/settings';
  import type { Segment, Speaker } from '$lib/types/transcript';
-  import { onMount } from 'svelte';
+  import { onMount, tick } from 'svelte';

+  let appReady = $state(false);
  let waveformPlayer: WaveformPlayer;
  let audioUrl = $state('');
  let showSettings = $state(false);

+  // Sidecar state
+  let sidecarReady = $state(false);
+  let sidecarChecked = $state(false);
+
+  // Sidecar update state
+  let sidecarUpdate = $state<{ current_version: string; latest_version: string } | null>(null);
+  let showUpdateDownload = $state(false);
+  let updateDismissed = $state(false);
+
+  // Project management state
+  let currentProjectPath = $state<string | null>(null);
+  let currentProjectName = $state('');
+  let audioFilePath = $state('');
+
+  async function checkSidecar() {
+    try {
+      const ready = await invoke<boolean>('check_sidecar');
+      sidecarReady = ready;
+    } catch {
+      sidecarReady = false;
+    }
+    sidecarChecked = true;
+  }
+
+  async function checkSidecarUpdate() {
+    try {
+      const update = await invoke<{ current_version: string; latest_version: string } | null>('check_sidecar_update');
+      sidecarUpdate = update;
+    } catch {
+      // Silently ignore update check failures
+    }
+  }
+
+  function handleSidecarSetupComplete() {
+    sidecarReady = true;
+    checkSidecarUpdate();
+  }
+
+  function handleUpdateComplete() {
+    showUpdateDownload = false;
+    sidecarUpdate = null;
+  }
+
  onMount(() => {
-    loadSettings();
+    loadSettings().then(() => {
+      // Restore devtools state from settings
+      if ($settings.devtools_enabled) {
+        invoke('toggle_devtools', { open: true });
+      }
+    });
+    checkSidecar().then(() => {
+      if (sidecarReady) {
+        checkSidecarUpdate();
+      }
+    });

    // Global keyboard shortcuts
    function handleKeyDown(e: KeyboardEvent) {
@@ -43,8 +98,8 @@

    // Close export dropdown on outside click
    function handleClickOutside(e: MouseEvent) {
+      const target = e.target as HTMLElement;
      if (showExportMenu) {
-        const target = e.target as HTMLElement;
        if (!target.closest('.export-dropdown')) {
          showExportMenu = false;
        }
@@ -54,6 +109,8 @@
    document.addEventListener('keydown', handleKeyDown);
    document.addEventListener('click', handleClickOutside);

+    appReady = true;
+
    return () => {
      document.removeEventListener('keydown', handleKeyDown);
      document.removeEventListener('click', handleClickOutside);
@@ -67,10 +124,136 @@
  // Speaker color palette for auto-assignment
  const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];

+  async function saveProject() {
+    const defaultName = currentProjectName || 'Untitled';
+    const outputPath = await save({
+      defaultPath: `${defaultName}.vtn`,
+      filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
+    });
+    if (!outputPath) return;
+
+    const projectData = {
+      version: 1,
+      name: outputPath.split(/[\\/]/).pop()?.replace('.vtn', '') || defaultName,
+      audio_file: audioFilePath,
+      created_at: new Date().toISOString(),
+      segments: $segments.map(seg => {
+        const speaker = $speakers.find(s => s.id === seg.speaker_id);
+        return {
+          text: seg.text,
+          start_ms: seg.start_ms,
+          end_ms: seg.end_ms,
+          speaker: speaker?.label ?? null,
+          is_edited: seg.is_edited,
+          words: seg.words.map(w => ({
+            word: w.word,
+            start_ms: w.start_ms,
+            end_ms: w.end_ms,
+            confidence: w.confidence ?? 0,
+          })),
+        };
+      }),
+      speakers: $speakers.map(s => ({
+        label: s.label,
+        display_name: s.display_name,
+        color: s.color || '#e94560',
+      })),
+    };
+
+    try {
+      await invoke('save_project_file', { path: outputPath, project: projectData });
+      currentProjectPath = outputPath;
+      currentProjectName = projectData.name;
+    } catch (err) {
+      console.error('Failed to save project:', err);
+      alert(`Failed to save: ${err}`);
+    }
+  }
+
+  async function openProject() {
+    const filePath = await open({
+      filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
+      multiple: false,
+    });
+    if (!filePath) return;
+
+    try {
+      const project = await invoke<{
+        version: number;
+        name: string;
+        audio_file: string;
+        segments: Array<{
+          text: string;
+          start_ms: number;
+          end_ms: number;
+          speaker: string | null;
+          is_edited: boolean;
+          words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
+        }>;
+        speakers: Array<{ label: string; display_name: string | null; color: string }>;
+      }>('load_project_file', { path: filePath });
+
+      // Rebuild speakers
+      const newSpeakers: Speaker[] = project.speakers.map((s, idx) => ({
+        id: `speaker-${idx}`,
+        project_id: '',
+        label: s.label,
+        display_name: s.display_name,
+        color: s.color,
+      }));
+      speakers.set(newSpeakers);
+
+      const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
+
+      // Rebuild segments
+      const newSegments: Segment[] = project.segments.map((seg, idx) => ({
+        id: `seg-${idx}`,
+        project_id: '',
+        media_file_id: '',
+        speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
+        start_ms: seg.start_ms,
+        end_ms: seg.end_ms,
+        text: seg.text,
+        original_text: null,
+        confidence: null,
+        is_edited: seg.is_edited,
+        edited_at: null,
+        segment_index: idx,
+        words: seg.words.map((w, widx) => ({
+          id: `word-${idx}-${widx}`,
+          segment_id: `seg-${idx}`,
+          word: w.word,
+          start_ms: w.start_ms,
+          end_ms: w.end_ms,
+          confidence: w.confidence,
+          word_index: widx,
+        })),
+      }));
+      segments.set(newSegments);
+
+      // Load audio
+      audioFilePath = project.audio_file;
+      audioUrl = convertFileSrc(project.audio_file);
+      waveformPlayer?.loadAudio(audioUrl);
+
+      currentProjectPath = filePath as string;
+      currentProjectName = project.name;
+    } catch (err) {
+      console.error('Failed to load project:', err);
+      alert(`Failed to load project: ${err}`);
+    }
+  }
+
  function handleWordClick(timeMs: number) {
+    console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
    waveformPlayer?.seekTo(timeMs);
  }

+  function handleTextEdit(segmentId: string, newText: string) {
+    // In-memory store is already updated by TranscriptEditor.
+    // Changes persist when user saves the project file.
+  }
+
  async function handleFileImport() {
    const filePath = await open({
      multiple: false,
@@ -82,28 +265,99 @@
    });
    if (!filePath) return;

-    // Convert file path to asset URL for wavesurfer
+    // Track the original file path and convert to asset URL for wavesurfer
+    audioFilePath = filePath;
    audioUrl = convertFileSrc(filePath);
    waveformPlayer?.loadAudio(audioUrl);

+    // Clear previous results
+    segments.set([]);
+    speakers.set([]);
+
    // Start pipeline (transcription + diarization)
    isTranscribing = true;
    transcriptionProgress = 0;
    transcriptionStage = 'Starting...';
    transcriptionMessage = 'Initializing pipeline...';

+    // Flush DOM so the progress overlay renders before the blocking invoke
+    await tick();
+
    // Listen for progress events from the sidecar
    const unlisten = await listen<{
      percent: number;
      stage: string;
      message: string;
    }>('pipeline-progress', (event) => {
+      console.log('[voice-to-notes] Progress event:', event.payload);
      const { percent, stage, message } = event.payload;
      if (typeof percent === 'number') transcriptionProgress = percent;
      if (typeof stage === 'string') transcriptionStage = stage;
      if (typeof message === 'string') transcriptionMessage = message;
    });

+    const unlistenSegment = await listen<{
+      index: number;
+      text: string;
+      start_ms: number;
+      end_ms: number;
+      words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
+    }>('pipeline-segment', (event) => {
+      const seg = event.payload;
+      const newSeg: Segment = {
+        id: `seg-${seg.index}`,
+        project_id: '',
+        media_file_id: '',
+        speaker_id: null,
+        start_ms: seg.start_ms,
+        end_ms: seg.end_ms,
+        text: seg.text,
+        original_text: null,
+        confidence: null,
+        is_edited: false,
+        edited_at: null,
+        segment_index: seg.index,
+        words: seg.words.map((w, widx) => ({
+          id: `word-${seg.index}-${widx}`,
+          segment_id: `seg-${seg.index}`,
+          word: w.word,
+          start_ms: w.start_ms,
+          end_ms: w.end_ms,
+          confidence: w.confidence,
+          word_index: widx,
+        })),
+      };
+      segments.update(segs => [...segs, newSeg]);
+    });
+
+    const unlistenSpeaker = await listen<{
+      updates: Array<{ index: number; speaker: string }>;
+    }>('pipeline-speaker-update', (event) => {
+      const { updates } = event.payload;
+      // Build speakers from unique labels
+      const uniqueLabels = [...new Set(updates.map(u => u.speaker))].sort();
+      const newSpeakers: Speaker[] = uniqueLabels.map((label, idx) => ({
+        id: `speaker-${idx}`,
+        project_id: '',
+        label,
+        display_name: null,
+        color: speakerColors[idx % speakerColors.length],
+      }));
+      speakers.set(newSpeakers);
+
+      // Update existing segments with speaker assignments
+      const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
+      segments.update(segs =>
+        segs.map((seg, i) => {
+          const update = updates.find(u => u.index === i);
+          if (update) {
+            return { ...seg, speaker_id: speakerLookup.get(update.speaker) ?? null };
+          }
+          return seg;
+        })
+      );
+    });
+
    try {
      const result = await invoke<{
        segments: Array<{
@@ -128,6 +382,8 @@
        device: $settings.transcription_device || undefined,
        language: $settings.transcription_language || undefined,
        skipDiarization: $settings.skip_diarization || undefined,
+        hfToken: $settings.hf_token || undefined,
+        numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
      });

      // Create speaker entries from pipeline result
@@ -169,11 +425,18 @@
      }));

      segments.set(newSegments);
+
+      // Set project name from audio file name (user can save explicitly)
+      const fileName = filePath.split(/[\\/]/).pop() || 'Untitled';
+      currentProjectName = fileName.replace(/\.[^.]+$/, '');
+      currentProjectPath = null;
    } catch (err) {
      console.error('Pipeline failed:', err);
      alert(`Pipeline failed: ${err}`);
    } finally {
      unlisten();
+      unlistenSegment();
+      unlistenSpeaker();
      isTranscribing = false;
    }
  }
@@ -229,56 +492,94 @@
  }
 </script>

-<div class="app-header">
-  <h1>Voice to Notes</h1>
-  <div class="header-actions">
-    <button class="import-btn" onclick={handleFileImport}>
-      Import Audio/Video
-    </button>
-    <button class="settings-btn" onclick={() => showSettings = true} title="Settings">
-      Settings
-    </button>
-    {#if $segments.length > 0}
-      <div class="export-dropdown">
-        <button class="export-btn" onclick={() => showExportMenu = !showExportMenu}>
-          Export
+{#if !appReady || !sidecarChecked}
+  <div class="splash-screen">
+    <h1 class="splash-title">Voice to Notes</h1>
+    <p class="splash-subtitle">Loading...</p>
+    <div class="splash-spinner"></div>
+  </div>
+{:else if sidecarChecked && !sidecarReady && !showUpdateDownload}
+  <SidecarSetup onComplete={handleSidecarSetupComplete} />
+{:else if showUpdateDownload}
+  <SidecarSetup onComplete={handleUpdateComplete} />
+{:else}
+  <div class="app-shell">
+  {#if sidecarUpdate && !updateDismissed}
+    <div class="update-banner">
+      <span class="update-text">
+        Sidecar update available (v{sidecarUpdate.current_version} &rarr; v{sidecarUpdate.latest_version})
+      </span>
+      <button class="update-btn" onclick={() => showUpdateDownload = true}>
+        Update
+      </button>
+      <button class="update-dismiss" onclick={() => updateDismissed = true} title="Dismiss">
+        &times;
+      </button>
+    </div>
+  {/if}
+  <div class="app-header">
+    <div class="header-actions">
+      <button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
+        Open Project
+      </button>
+      {#if $segments.length > 0}
+        <button class="settings-btn" onclick={saveProject}>
+          Save Project
        </button>
-        {#if showExportMenu}
-          <div class="export-menu">
-            {#each exportFormats as fmt}
-              <button class="export-option" onclick={() => handleExport(fmt.format, fmt.ext, fmt.name)}>
-                {fmt.name} (.{fmt.ext})
-              </button>
-            {/each}
-          </div>
+      {/if}
+      <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
+        {#if isTranscribing}
+          Processing...
+        {:else}
+          Import Audio/Video
        {/if}
-      </div>
-    {/if}
+      </button>
+      <button class="settings-btn" onclick={() => showSettings = true} title="Settings">
+        Settings
+      </button>
+      {#if $segments.length > 0}
+        <div class="export-dropdown">
+          <button class="export-btn" onclick={() => showExportMenu = !showExportMenu}>
+            Export
+          </button>
+          {#if showExportMenu}
+            <div class="export-menu">
+              {#each exportFormats as fmt}
+                <button class="export-option" onclick={() => handleExport(fmt.format, fmt.ext, fmt.name)}>
+                  {fmt.name} (.{fmt.ext})
+                </button>
+              {/each}
+            </div>
+          {/if}
+        </div>
+      {/if}
+    </div>
  </div>
-</div>

-<div class="workspace">
-  <div class="main-content">
-    <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
-    <TranscriptEditor onWordClick={handleWordClick} />
+  <div class="workspace">
+    <div class="main-content">
+      <WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
+      <TranscriptEditor onWordClick={handleWordClick} onTextEdit={handleTextEdit} />
+    </div>
+    <div class="sidebar-right">
+      <SpeakerManager />
+      <AIChatPanel />
+    </div>
  </div>
-  <div class="sidebar-right">
-    <SpeakerManager />
-    <AIChatPanel />
  </div>
-</div>

-<ProgressOverlay
-  visible={isTranscribing}
-  percent={transcriptionProgress}
-  stage={transcriptionStage}
-  message={transcriptionMessage}
-/>
+  <ProgressOverlay
+    visible={isTranscribing}
+    percent={transcriptionProgress}
+    stage={transcriptionStage}
+    message={transcriptionMessage}
+  />

-<SettingsModal
-  visible={showSettings}
-  onClose={() => showSettings = false}
-/>
+  <SettingsModal
+    visible={showSettings}
+    onClose={() => showSettings = false}
+  />
+{/if}

 <style>
  .app-header {
@@ -289,10 +590,6 @@
    background: #0f3460;
    color: #e0e0e0;
  }
-  h1 {
-    font-size: 1.25rem;
-    margin: 0;
-  }
  .import-btn {
    background: #e94560;
    border: none;
@@ -303,9 +600,18 @@
    font-size: 0.875rem;
    font-weight: 500;
  }
-  .import-btn:hover {
+  .import-btn:hover:not(:disabled) {
    background: #d63851;
  }
+  .import-btn:disabled {
+    opacity: 0.7;
+    cursor: not-allowed;
+    animation: pulse 1.5s ease-in-out infinite;
+  }
+  @keyframes pulse {
+    0%, 100% { opacity: 0.7; }
+    50% { opacity: 1; }
+  }
  .header-actions {
    display: flex;
    gap: 0.5rem;
@@ -320,10 +626,14 @@
    cursor: pointer;
    font-size: 0.875rem;
  }
-  .settings-btn:hover {
+  .settings-btn:hover:not(:disabled) {
    background: rgba(255,255,255,0.05);
    border-color: #e94560;
  }
+  .settings-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+  }
  .export-dropdown {
    position: relative;
  }
@@ -366,11 +676,19 @@
  .export-option:hover {
    background: rgba(233, 69, 96, 0.2);
  }
+  .app-shell {
+    display: flex;
+    flex-direction: column;
+    height: 100vh;
+    overflow: hidden;
+  }
  .workspace {
    display: flex;
    gap: 1rem;
    padding: 1rem;
-    height: calc(100vh - 3.5rem);
+    flex: 1;
+    min-height: 0;
+    overflow: hidden;
    background: #0a0a23;
  }
  .main-content {
@@ -379,6 +697,8 @@
    flex-direction: column;
    gap: 1rem;
    min-width: 0;
+    min-height: 0;
+    overflow-y: auto;
  }
  .sidebar-right {
    width: 300px;
@@ -386,5 +706,79 @@
    flex-direction: column;
    gap: 1rem;
    flex-shrink: 0;
+    min-height: 0;
+    overflow-y: auto;
+  }
+  .splash-screen {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    height: 100vh;
+    background: #0a0a23;
+    color: #e0e0e0;
+    gap: 1rem;
+  }
+  .splash-title {
+    font-size: 2rem;
+    margin: 0;
+    color: #e94560;
+  }
+  .splash-subtitle {
+    font-size: 1rem;
+    color: #888;
+    margin: 0;
+  }
+  .splash-spinner {
+    width: 32px;
+    height: 32px;
+    border: 3px solid #2a3a5e;
+    border-top-color: #e94560;
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+  }
+  @keyframes spin {
+    to { transform: rotate(360deg); }
+  }
+
+  /* Sidecar update banner */
+  .update-banner {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    padding: 0.5rem 1rem;
+    background: rgba(78, 205, 196, 0.1);
+    border-bottom: 1px solid rgba(78, 205, 196, 0.25);
+    color: #e0e0e0;
+    font-size: 0.85rem;
+  }
+  .update-text {
+    flex: 1;
+    color: #b0b0b0;
+  }
+  .update-btn {
+    background: #4ecdc4;
+    border: none;
+    color: #0a0a23;
+    padding: 0.3rem 0.85rem;
+    border-radius: 4px;
+    cursor: pointer;
+    font-size: 0.8rem;
+    font-weight: 600;
+  }
+  .update-btn:hover {
+    background: #3dbdb5;
+  }
+  .update-dismiss {
+    background: none;
+    border: none;
+    color: #888;
+    font-size: 1.1rem;
+    cursor: pointer;
+    padding: 0.1rem 0.3rem;
+    line-height: 1;
+  }
+  .update-dismiss:hover {
+    color: #e0e0e0;
  }
 </style>