2 Commits

Author SHA1 Message Date
Claude
eb9ec687cb Use uv for Python management in CI and build script
- CI: install uv via astral-sh/setup-uv, use uv to install Python
  and run the build script (replaces setup-python which fails on
  self-hosted macOS runners)
- build_sidecar.py: auto-detects uv and uses it for venv creation
  and package installation (much faster), falls back to standard
  venv + pip when uv is not available

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 22:56:13 -07:00
Claude
d297540053 Skip setup-python on macOS, use system Python instead
setup-python's internal install script hardcodes /Users/runner which
fails on self-hosted runners without sudo. macOS ships with Python 3
so we use it directly and skip the action entirely.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 22:47:05 -07:00
35 changed files with 353 additions and 2551 deletions

View File

@@ -1,402 +0,0 @@
name: Build Sidecars
on:
push:
branches: [main]
paths: ['python/**']
workflow_dispatch:
jobs:
bump-sidecar-version:
name: Bump sidecar version and tag
if: "!contains(github.event.head_commit.message, '[skip ci]')"
runs-on: ubuntu-latest
outputs:
version: ${{ steps.bump.outputs.version }}
tag: ${{ steps.bump.outputs.tag }}
has_changes: ${{ steps.check_changes.outputs.has_changes }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check for python changes
id: check_changes
run: |
# If triggered by workflow_dispatch, always build
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check if any python/ files changed in this commit
CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "")
if [ -n "$CHANGED" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Python changes detected: $CHANGED"
else
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No python/ changes detected, skipping sidecar build"
fi
- name: Configure git
if: steps.check_changes.outputs.has_changes == 'true'
run: |
git config user.name "Gitea Actions"
git config user.email "actions@gitea.local"
- name: Bump sidecar patch version
if: steps.check_changes.outputs.has_changes == 'true'
id: bump
run: |
# Read current version from python/pyproject.toml
CURRENT=$(grep '^version = ' python/pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
echo "Current sidecar version: ${CURRENT}"
# Increment patch number
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
NEW_PATCH=$((PATCH + 1))
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
echo "New sidecar version: ${NEW_VERSION}"
# Update python/pyproject.toml
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" python/pyproject.toml
echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
- name: Commit and tag
if: steps.check_changes.outputs.has_changes == 'true'
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
NEW_VERSION="${{ steps.bump.outputs.version }}"
TAG="${{ steps.bump.outputs.tag }}"
git add python/pyproject.toml
git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
git tag "${TAG}"
# Push using token for authentication (rebase in case another workflow pushed first)
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
git pull --rebase "${REMOTE_URL}" main || true
git push "${REMOTE_URL}" HEAD:main
git push "${REMOTE_URL}" "${TAG}"
- name: Create Gitea release
if: steps.check_changes.outputs.has_changes == 'true'
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ steps.bump.outputs.tag }}"
VERSION="${{ steps.bump.outputs.version }}"
RELEASE_NAME="Sidecar v${VERSION}"
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
"${REPO_API}/releases"
echo "Created release: ${RELEASE_NAME}"
build-sidecar-linux:
name: Build Sidecar (Linux)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: ubuntu-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
run: |
if command -v uv &> /dev/null; then
echo "uv already installed: $(uv --version)"
else
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
fi
- name: Install ffmpeg
run: sudo apt-get update && sudo apt-get install -y ffmpeg
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar (CUDA)
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
- name: Package sidecar (CUDA)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cuda.zip .
- name: Build sidecar (CPU)
working-directory: python
run: |
rm -rf dist/voice-to-notes-sidecar
uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cpu.zip .
- name: Upload to sidecar release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
sudo apt-get install -y jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
echo "Waiting for sidecar release ${TAG} to be available..."
for i in $(seq 1 30); do
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}")
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
fi
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
sleep 10
done
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
fi
for file in sidecar-*.zip; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done
build-sidecar-windows:
name: Build Sidecar (Windows)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: windows-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
shell: powershell
run: |
if (Get-Command uv -ErrorAction SilentlyContinue) {
Write-Host "uv already installed: $(uv --version)"
} else {
irm https://astral.sh/uv/install.ps1 | iex
echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
}
- name: Install ffmpeg
shell: powershell
run: choco install ffmpeg -y
- name: Set up Python
shell: powershell
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Install 7-Zip
shell: powershell
run: |
if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
choco install 7zip -y
}
- name: Build sidecar (CUDA)
shell: powershell
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
- name: Package sidecar (CUDA)
shell: powershell
run: |
7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\python\dist\voice-to-notes-sidecar\*
- name: Build sidecar (CPU)
shell: powershell
working-directory: python
run: |
Remove-Item -Recurse -Force dist\voice-to-notes-sidecar
uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
shell: powershell
run: |
7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\python\dist\voice-to-notes-sidecar\*
- name: Upload to sidecar release
shell: powershell
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
$TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
Write-Host "Waiting for sidecar release ${TAG} to be available..."
$RELEASE_ID = $null
for ($i = 1; $i -le 30; $i++) {
try {
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
$RELEASE_ID = $release.id
if ($RELEASE_ID) {
Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
}
} catch {
# Release not ready yet
}
Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
Start-Sleep -Seconds 10
}
if (-not $RELEASE_ID) {
Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
}
Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
$filename = $_.Name
$encodedName = [System.Uri]::EscapeDataString($filename)
$size = [math]::Round($_.Length / 1MB, 1)
Write-Host "Uploading ${filename} (${size} MB)..."
try {
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
$existing = $assets | Where-Object { $_.name -eq $filename }
if ($existing) {
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
}
} catch {}
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
$result = curl.exe --fail --silent --show-error `
-X POST `
-H "Authorization: token $env:BUILD_TOKEN" `
-H "Content-Type: application/octet-stream" `
-T "$($_.FullName)" `
"$uploadUrl" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "Upload successful: ${filename}"
} else {
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
}
}
build-sidecar-macos:
name: Build Sidecar (macOS)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: macos-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
run: |
if command -v uv &> /dev/null; then
echo "uv already installed: $(uv --version)"
else
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
fi
- name: Install ffmpeg
run: brew install ffmpeg
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar (CPU)
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-macos-aarch64-cpu.zip .
- name: Upload to sidecar release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
which jq || brew install jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
echo "Waiting for sidecar release ${TAG} to be available..."
for i in $(seq 1 30); do
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}")
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
fi
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
sleep 10
done
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
fi
for file in sidecar-*.zip; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done

191
.gitea/workflows/build.yml Normal file
View File

@@ -0,0 +1,191 @@
name: Build & Release
on:
push:
branches: [main]
tags: ["v*"]
pull_request:
branches: [main]
env:
PYTHON_VERSION: "3.11"
NODE_VERSION: "20"
jobs:
build-sidecar:
name: Build sidecar (${{ matrix.target }})
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
platform: linux
- runner: windows-latest
target: x86_64-pc-windows-msvc
platform: windows
- runner: macos-latest
target: aarch64-apple-darwin
platform: macos
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Upload sidecar artifact
uses: actions/upload-artifact@v3
with:
name: sidecar-${{ matrix.target }}
path: python/dist/voice-to-notes-sidecar/
retention-days: 7
build-tauri:
name: Build app (${{ matrix.target }})
needs: build-sidecar
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
platform: linux
- runner: windows-latest
target: x86_64-pc-windows-msvc
platform: windows
- runner: macos-latest
target: aarch64-apple-darwin
platform: macos
steps:
- uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
- name: Install system dependencies (Linux)
if: matrix.platform == 'linux'
run: |
sudo apt-get update
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
- name: Install system dependencies (macOS)
if: matrix.platform == 'macos'
run: |
brew install --quiet create-dmg || true
- name: Download sidecar artifact
uses: actions/download-artifact@v3
with:
name: sidecar-${{ matrix.target }}
path: src-tauri/binaries/
- name: Make sidecar executable (Unix)
if: matrix.platform != 'windows'
run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
env:
TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
- name: Upload app artifacts (Linux)
if: matrix.platform == 'linux'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/deb/*.deb
src-tauri/target/release/bundle/appimage/*.AppImage
retention-days: 30
- name: Upload app artifacts (Windows)
if: matrix.platform == 'windows'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/msi/*.msi
src-tauri/target/release/bundle/nsis/*.exe
retention-days: 30
- name: Upload app artifacts (macOS)
if: matrix.platform == 'macos'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/dmg/*.dmg
src-tauri/target/release/bundle/macos/*.app
retention-days: 30
release:
name: Create Release
needs: build-tauri
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install required tools
run: |
sudo apt-get update
sudo apt-get install -y jq curl
- name: Download all app artifacts
uses: actions/download-artifact@v3
with:
path: artifacts/
- name: Generate release tag
id: tag
run: echo "tag=build-$(date +%Y%m%d-%H%M%S)" >> $GITHUB_OUTPUT
- name: Create release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
TAG: ${{ steps.tag.outputs.tag }}
run: |
# Create the release
RELEASE_ID=$(curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"Voice to Notes ${TAG}\", \"body\": \"Automated build from main branch.\", \"draft\": false, \"prerelease\": true}" \
"${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/releases" | jq -r '.id')
echo "Release ID: ${RELEASE_ID}"
if [ "${RELEASE_ID}" = "null" ] || [ -z "${RELEASE_ID}" ]; then
echo "ERROR: Failed to create release. Check BUILD_TOKEN permissions."
exit 1
fi
# Upload all artifacts
find artifacts/ -type f \( -name "*.deb" -o -name "*.AppImage" -o -name "*.msi" -o -name "*.exe" -o -name "*.dmg" \) | while read file; do
filename=$(basename "$file")
echo "Uploading ${filename}..."
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
--data-binary "@${file}" \
"${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/releases/${RELEASE_ID}/assets?name=${filename}"
done

View File

@@ -1,65 +0,0 @@
name: Cleanup Old Releases
on:
# Run after release and sidecar workflows complete
schedule:
- cron: '0 6 * * *' # Daily at 6am UTC
workflow_dispatch:
jobs:
cleanup:
name: Remove old releases
runs-on: ubuntu-latest
env:
KEEP_COUNT: 5
steps:
- name: Cleanup old app releases
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
# Get all releases, sorted newest first (API default)
RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases?limit=50")
# Separate app releases (v*) and sidecar releases (sidecar-v*)
APP_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("v") and (startswith("sidecar") | not)) | .id] | .[]')
SIDECAR_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("sidecar-v")) | .id] | .[]')
# Delete app releases beyond KEEP_COUNT
COUNT=0
for ID in $APP_IDS; do
COUNT=$((COUNT + 1))
if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
continue
fi
TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
echo "Deleting app release $ID ($TAG)..."
curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/$ID"
# Also delete the tag
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/tags/$TAG"
done
# Delete sidecar releases beyond KEEP_COUNT
COUNT=0
for ID in $SIDECAR_IDS; do
COUNT=$((COUNT + 1))
if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
continue
fi
TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
echo "Deleting sidecar release $ID ($TAG)..."
curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/$ID"
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/tags/$TAG"
done
echo "Cleanup complete. Kept latest ${{ env.KEEP_COUNT }} of each type."

View File

@@ -1,305 +0,0 @@
name: Release
on:
push:
branches: [main]
jobs:
bump-version:
name: Bump version and tag
# Skip if this is a version-bump commit (avoid infinite loop)
if: "!contains(github.event.head_commit.message, '[skip ci]')"
runs-on: ubuntu-latest
outputs:
new_version: ${{ steps.bump.outputs.new_version }}
tag: ${{ steps.bump.outputs.tag }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Configure git
run: |
git config user.name "Gitea Actions"
git config user.email "actions@gitea.local"
- name: Bump patch version
id: bump
run: |
# Read current version from package.json
CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
echo "Current version: ${CURRENT}"
# Increment patch number
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
NEW_PATCH=$((PATCH + 1))
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
echo "New version: ${NEW_VERSION}"
# Update package.json
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
# Update src-tauri/tauri.conf.json
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
# Update src-tauri/Cargo.toml (match version = "x.y.z" in [package] section)
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
- name: Commit and tag
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
NEW_VERSION="${{ steps.bump.outputs.new_version }}"
git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml
git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
git tag "v${NEW_VERSION}"
# Push using token for authentication (rebase in case another workflow pushed first)
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
git pull --rebase "${REMOTE_URL}" main || true
git push "${REMOTE_URL}" HEAD:main
git push "${REMOTE_URL}" "v${NEW_VERSION}"
- name: Create Gitea release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ steps.bump.outputs.tag }}"
RELEASE_NAME="Voice to Notes ${TAG}"
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
"${REPO_API}/releases"
echo "Created release: ${RELEASE_NAME}"
# ── Platform builds (run after version bump) ──
build-linux:
name: Build App (Linux)
needs: bump-version
runs-on: ubuntu-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
- name: Upload to release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
sudo apt-get install -y jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-version.outputs.tag }}"
echo "Release tag: ${TAG}"
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find release for tag ${TAG}."
exit 1
fi
echo "Release ID: ${RELEASE_ID}"
find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" \) | while IFS= read -r file; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done
build-windows:
name: Build App (Windows)
needs: bump-version
runs-on: windows-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
shell: powershell
run: |
if (Get-Command rustup -ErrorAction SilentlyContinue) {
rustup default stable
} else {
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
.\rustup-init.exe -y --default-toolchain stable
echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
}
- name: Install npm dependencies
shell: powershell
run: npm ci
- name: Build Tauri app
shell: powershell
run: npm run tauri build
- name: Upload to release
shell: powershell
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
$TAG = "${{ needs.bump-version.outputs.tag }}"
Write-Host "Release tag: ${TAG}"
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
$RELEASE_ID = $release.id
Write-Host "Release ID: ${RELEASE_ID}"
Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
$filename = $_.Name
$encodedName = [System.Uri]::EscapeDataString($filename)
$size = [math]::Round($_.Length / 1MB, 1)
Write-Host "Uploading ${filename} (${size} MB)..."
try {
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
$existing = $assets | Where-Object { $_.name -eq $filename }
if ($existing) {
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
}
} catch {}
# Use curl for streaming upload (Invoke-RestMethod fails on large files)
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
$result = curl.exe --fail --silent --show-error `
-X POST `
-H "Authorization: token $env:BUILD_TOKEN" `
-H "Content-Type: application/octet-stream" `
-T "$($_.FullName)" `
"$uploadUrl" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "Upload successful: ${filename}"
} else {
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
}
}
build-macos:
name: Build App (macOS)
needs: bump-version
runs-on: macos-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install system dependencies
run: brew install --quiet create-dmg || true
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
- name: Upload to release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
which jq || brew install jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-version.outputs.tag }}"
echo "Release tag: ${TAG}"
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find release for tag ${TAG}."
exit 1
fi
echo "Release ID: ${RELEASE_ID}"
find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done

1
.gitignore vendored
View File

@@ -50,6 +50,5 @@ Thumbs.db
# Sidecar build artifacts # Sidecar build artifacts
src-tauri/binaries/* src-tauri/binaries/*
!src-tauri/binaries/.gitkeep !src-tauri/binaries/.gitkeep
src-tauri/sidecar.zip
python/dist/ python/dist/
python/build/ python/build/

View File

@@ -1,140 +0,0 @@
# Contributing to Voice to Notes
Thank you for your interest in contributing! This guide covers how to set up the project for development and submit changes.
## Development Setup
### Prerequisites
- **Node.js 20+** and npm
- **Rust** (stable toolchain)
- **Python 3.11+** with [uv](https://docs.astral.sh/uv/) (recommended) or pip
- **System libraries (Linux only):**
```bash
sudo apt install libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
```
### Clone and Install
```bash
git clone https://repo.anhonesthost.net/MacroPad/voice-to-notes.git
cd voice-to-notes
# Frontend
npm install
# Python sidecar
cd python && pip install -e ".[dev]" && cd ..
```
### Running in Dev Mode
```bash
npm run tauri:dev
```
This runs the Svelte dev server + Tauri with hot-reload. The Python sidecar runs from your system Python (no PyInstaller needed in dev mode).
### Building
```bash
# Build the Python sidecar (frozen binary)
cd python && python build_sidecar.py --cpu-only && cd ..
# Build the full app
npm run tauri build
```
## Project Structure
```
src/ # Svelte 5 frontend
lib/components/ # Reusable UI components
lib/stores/ # Svelte stores (app state)
routes/ # SvelteKit pages
src-tauri/ # Rust backend (Tauri v2)
src/sidecar/ # Python sidecar lifecycle (download, extract, IPC)
src/commands/ # Tauri command handlers
src/db/ # SQLite database layer
python/ # Python ML sidecar
voice_to_notes/ # Main package
services/ # Transcription, diarization, AI, export
ipc/ # JSON-line IPC protocol
hardware/ # GPU/CPU detection
.gitea/workflows/ # CI/CD pipelines
docs/ # Documentation
```
## How It Works
The app has three layers:
1. **Frontend (Svelte)** — UI, audio playback (wavesurfer.js), transcript editing (TipTap)
2. **Backend (Rust/Tauri)** — Desktop integration, file access, SQLite, sidecar process management
3. **Sidecar (Python)** — ML inference (faster-whisper, pyannote.audio), AI chat, export
Rust and Python communicate via **JSON-line IPC** over stdin/stdout pipes. Each request has an `id`, `type`, and `payload`. The Python sidecar runs as a child process managed by `SidecarManager` in Rust.
## Conventions
### Rust
- Follow standard Rust conventions
- Run `cargo fmt` and `cargo clippy` before committing
- Tauri commands go in `src-tauri/src/commands/`
### Python
- Python 3.11+, type hints everywhere
- Use `ruff` for linting: `ruff check python/`
- Tests with pytest: `cd python && pytest`
- IPC messages: JSON-line format with `id`, `type`, `payload` fields
### TypeScript / Svelte
- Svelte 5 runes (`$state`, `$derived`, `$effect`)
- Strict TypeScript
- Components in `src/lib/components/`
- State in `src/lib/stores/`
### General
- All timestamps in milliseconds (integer)
- UUIDs as primary keys in the database
- Don't bundle API keys or secrets — those are user-configured
## Submitting Changes
1. Fork the repository
2. Create a feature branch: `git checkout -b my-feature`
3. Make your changes
4. Test locally with `npm run tauri:dev`
5. Run linters: `cargo fmt && cargo clippy`, `ruff check python/`
6. Commit with a clear message describing the change
7. Open a Pull Request against `main`
## CI/CD
Pushes to `main` automatically:
- Bump the app version and create a release (`release.yml`)
- Build app installers for all platforms
Changes to `python/` also trigger sidecar builds (`build-sidecar.yml`).
## Areas for Contribution
- UI/UX improvements
- New export formats
- Additional AI provider integrations
- Performance optimizations
- Accessibility improvements
- Documentation and translations
- Bug reports and testing on different platforms
## Reporting Issues
Open an issue on the [repository](https://repo.anhonesthost.net/MacroPad/voice-to-notes/issues) with:
- Steps to reproduce
- Expected vs actual behavior
- Platform and version info
- Sidecar logs (`%LOCALAPPDATA%\com.voicetonotes.app\sidecar.log` on Windows)
## License
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).

113
README.md
View File

@@ -1,55 +1,32 @@
# Voice to Notes # Voice to Notes
A desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, plain text, or Markdown. A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
## Features ## Features
- **Speech-to-Text** — Accurate transcription via faster-whisper with word-level timestamps. Supports 99 languages. - **Speech-to-Text Transcription** — Accurate transcription via faster-whisper (Whisper models) with word-level timestamps
- **Speaker Identification** — Detect and label speakers using pyannote.audio. Rename speakers for clean exports. - **Speaker Identification (Diarization)** — Detect and distinguish between speakers using pyannote.audio
- **GPU Acceleration** — CUDA support for NVIDIA GPUs (Windows/Linux). Falls back to CPU automatically. - **Synchronized Playback** — Click any word to seek to that point in the audio (Web Audio API for instant playback)
- **Synchronized Playback** — Click any word to seek. Waveform visualization via wavesurfer.js. - **AI Integration** — Ask questions about your transcript via OpenAI, Anthropic, or any OpenAI-compatible API (LiteLLM proxies, Ollama, vLLM)
- **AI Chat** — Ask questions about your transcript. Works with Ollama (local), OpenAI, Anthropic, or any OpenAI-compatible API. - **Export Formats** — SRT, WebVTT, ASS captions, plain text, and Markdown with speaker labels
- **Export** — SRT, WebVTT, ASS, plain text, Markdown — all with speaker labels. - **Cross-Platform** — Builds for Linux, Windows, and macOS (Apple Silicon)
- **Cross-Platform** — Linux, Windows, macOS (Apple Silicon).
## Quick Start
1. Download the installer from [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases)
2. On first launch, choose **CPU** or **CUDA** sidecar (the AI engine downloads separately, ~500MB2GB)
3. Import an audio/video file and click **Transcribe**
See the full [User Guide](docs/USER_GUIDE.md) for detailed setup and usage instructions.
## Platform Support ## Platform Support
| Platform | Architecture | Installers | | Platform | Architecture | Status |
|----------|-------------|------------| |----------|-------------|--------|
| Linux | x86_64 | .deb, .rpm | | Linux | x86_64 | Supported |
| Windows | x86_64 | .msi, .exe (NSIS) | | Windows | x86_64 | Supported |
| macOS | ARM (Apple Silicon) | .dmg | | macOS | ARM (Apple Silicon) | Supported |
## Architecture
The app is split into two independently versioned components:
- **App** (v0.2.x) — Tauri desktop shell with Svelte frontend. Small installer (~50MB).
- **Sidecar** (v1.x) — Python ML engine (faster-whisper, pyannote.audio). Downloaded on first launch. CPU (~500MB) or CUDA (~2GB) variants.
This separation means app UI updates don't require re-downloading the sidecar, and sidecar updates don't require reinstalling the app.
## Tech Stack ## Tech Stack
| Component | Technology | - **Desktop shell:** Tauri v2 (Rust backend + Svelte 5 / TypeScript frontend)
|-----------|-----------| - **ML pipeline:** Python sidecar (faster-whisper, pyannote.audio) — frozen via PyInstaller for distribution
| Desktop shell | Tauri v2 (Rust + Svelte 5 / TypeScript) | - **Audio playback:** wavesurfer.js with Web Audio API backend
| Transcription | faster-whisper (CTranslate2) | - **AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (local or remote)
| Speaker ID | pyannote.audio 3.1 | - **Local AI:** Bundled llama-server (llama.cpp)
| Audio UI | wavesurfer.js | - **Caption export:** pysubs2
| Transcript editor | TipTap (ProseMirror) |
| AI (local) | Ollama (any model) |
| AI (cloud) | OpenAI, Anthropic, OpenAI-compatible |
| Caption export | pysubs2 |
| Database | SQLite (rusqlite) |
## Development ## Development
@@ -57,8 +34,8 @@ This separation means app UI updates don't require re-downloading the sidecar, a
- Node.js 20+ - Node.js 20+
- Rust (stable) - Rust (stable)
- Python 3.11+ with uv or pip - Python 3.11+ with ML dependencies
- Linux: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev` - System: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev` (Linux)
### Getting Started ### Getting Started
@@ -67,63 +44,49 @@ This separation means app UI updates don't require re-downloading the sidecar, a
npm install npm install
# Install Python sidecar dependencies # Install Python sidecar dependencies
cd python && pip install -e ".[dev]" && cd .. cd python && pip install -e . && cd ..
# Run in dev mode (uses system Python for the sidecar) # Run in dev mode (uses system Python for the sidecar)
npm run tauri:dev npm run tauri:dev
``` ```
### Building ### Building for Distribution
```bash ```bash
# Build the frozen Python sidecar (CPU-only) # Build the frozen Python sidecar
cd python && python build_sidecar.py --cpu-only && cd .. npm run sidecar:build
# Build with CUDA support # Build the Tauri app (requires sidecar in src-tauri/binaries/)
cd python && python build_sidecar.py --with-cuda && cd ..
# Build the Tauri app
npm run tauri build npm run tauri build
``` ```
### CI/CD ### CI/CD
Two Gitea Actions workflows in `.gitea/workflows/`: Gitea Actions workflows are in `.gitea/workflows/`. The build pipeline:
**`release.yml`** — Triggers on push to main: 1. **Build sidecar**PyInstaller-frozen Python binary per platform (CPU-only PyTorch)
1. Bumps app version (patch), creates git tag and Gitea release 2. **Build Tauri app** — Bundles the sidecar via `externalBin`, produces .deb/.AppImage (Linux), .msi (Windows), .dmg (macOS)
2. Builds lightweight app installers for all platforms (no sidecar bundled)
**`build-sidecar.yml`** — Triggers on changes to `python/` or manual dispatch:
1. Bumps sidecar version, creates `sidecar-v*` tag and release
2. Builds CPU + CUDA variants for Linux/Windows, CPU for macOS
3. Uploads as separate release assets
#### Required Secrets #### Required Secrets
| Secret | Purpose | | Secret | Purpose | Required? |
|--------|---------| |--------|---------|-----------|
| `BUILD_TOKEN` | Gitea API token for creating releases and pushing tags | | `TAURI_SIGNING_PRIVATE_KEY` | Signs Tauri update bundles | Optional (for auto-updates) |
No other secrets are needed for building. AI provider API keys and HuggingFace tokens are configured by end users in the app's Settings.
### Project Structure ### Project Structure
``` ```
src/ # Svelte 5 frontend src/ # Svelte 5 frontend
lib/components/ # UI components (waveform, transcript editor, settings, etc.) src-tauri/ # Rust backend (Tauri commands, sidecar manager, SQLite)
lib/stores/ # Svelte stores (settings, transcript state) python/ # Python sidecar (transcription, diarization, AI)
routes/ # SvelteKit pages voice_to_notes/ # Python package
src-tauri/ # Rust backend
src/sidecar/ # Sidecar process manager (download, extract, IPC)
src/commands/ # Tauri command handlers
nsis-hooks.nsh # Windows uninstall cleanup
python/ # Python sidecar
voice_to_notes/ # Python package (transcription, diarization, AI, export)
build_sidecar.py # PyInstaller build script build_sidecar.py # PyInstaller build script
voice_to_notes.spec # PyInstaller spec voice_to_notes.spec # PyInstaller spec
.gitea/workflows/ # CI/CD (release.yml, build-sidecar.yml) .gitea/workflows/ # Gitea Actions CI/CD
docs/ # Documentation
``` ```
## License ## License
[MIT](LICENSE) MIT

View File

@@ -1,203 +0,0 @@
# Voice to Notes — User Guide
## Getting Started
### Installation
Download the installer for your platform from the [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases) page:
- **Windows:** `.msi` or `-setup.exe`
- **Linux:** `.deb` or `.rpm`
- **macOS:** `.dmg`
### First-Time Setup
On first launch, Voice to Notes will prompt you to download its AI engine (the "sidecar"):
1. Choose **Standard (CPU)** (~500 MB) or **GPU Accelerated (CUDA)** (~2 GB)
- Choose CUDA if you have an NVIDIA GPU for significantly faster transcription
- CPU works on all computers
2. Click **Download & Install** and wait for the download to complete
3. The app will proceed to the main interface once the sidecar is ready
The sidecar only needs to be downloaded once. Updates are detected automatically on launch.
---
## Basic Workflow
### 1. Import Audio
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM
### 2. Transcribe
After importing, click **Transcribe** to start the transcription pipeline:
- **Transcription:** Converts speech to text with word-level timestamps
- **Speaker Detection:** Identifies different speakers (if configured — see [Speaker Detection](#speaker-detection))
- A progress bar shows the current stage and percentage
### 3. Review and Edit
- The **waveform** displays at the top — click anywhere to seek
- The **transcript** shows below with speaker labels and timestamps
- **Click any word** in the transcript to jump to that point in the audio
- The current word highlights during playback
- **Edit text** directly in the transcript — word timings are preserved
### 4. Export
Click **Export** and choose a format:
| Format | Extension | Best For |
|--------|-----------|----------|
| SRT | `.srt` | Video subtitles (most compatible) |
| WebVTT | `.vtt` | Web video players, HTML5 |
| ASS/SSA | `.ass` | Styled subtitles with speaker colors |
| Plain Text | `.txt` | Reading, sharing, pasting |
| Markdown | `.md` | Documentation, notes |
All formats include speaker labels when speaker detection is enabled.
### 5. Save Project
- **Ctrl+S** (Cmd+S) saves the current project as a `.vtn` file
- This preserves the full transcript, speaker assignments, and edits
- Reopen later to continue editing or re-export
---
## Playback Controls
| Action | Shortcut |
|--------|----------|
| Play / Pause | **Space** |
| Skip back 5s | **Left Arrow** |
| Skip forward 5s | **Right Arrow** |
| Seek to word | Click any word in the transcript |
| Import audio | **Ctrl+O** / **Cmd+O** |
| Open settings | **Ctrl+,** / **Cmd+,** |
---
## Speaker Detection
Speaker detection (diarization) identifies who is speaking at each point in the audio. It requires a one-time setup:
### Setup
1. Go to **Settings > Speakers**
2. Create a free account at [huggingface.co](https://huggingface.co/join)
3. Accept the license on **all three** model pages:
- [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
- [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
- [pyannote/speaker-diarization-community-1](https://huggingface.co/pyannote/speaker-diarization-community-1)
4. Create a token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (read access is sufficient)
5. Paste the token in Settings and click **Test & Download Model**
### Speaker Options
- **Number of speakers:** Set to auto-detect or specify a fixed number for faster results
- **Skip speaker detection:** Check this to only transcribe without identifying speakers
### Managing Speakers
After transcription, speakers appear as "Speaker 1", "Speaker 2", etc. in the left sidebar. Double-click a speaker name to rename it — the new name appears throughout the transcript and in exports.
---
## AI Chat
The AI chat panel lets you ask questions about your transcript. The AI sees the full transcript with speaker labels as context.
Example prompts:
- "Summarize this conversation"
- "What were the key action items?"
- "What did Speaker 1 say about the budget?"
### Setting Up Ollama (Local AI)
[Ollama](https://ollama.com) runs AI models locally on your computer — no API keys or internet required.
1. **Install Ollama:**
- Download from [ollama.com](https://ollama.com)
- Or on Linux: `curl -fsSL https://ollama.com/install.sh | sh`
2. **Pull a model:**
```bash
ollama pull llama3.2
```
Other good options: `mistral`, `gemma2`, `phi3`
3. **Configure in Voice to Notes:**
- Go to **Settings > AI Provider**
- Select **Ollama**
- URL: `http://localhost:11434` (default, usually no change needed)
- Model: `llama3.2` (or whichever model you pulled)
4. **Use:** Open the AI chat panel (right sidebar) and start asking questions
### Cloud AI Providers
If you prefer cloud-based AI:
**OpenAI:**
- Select **OpenAI** in Settings > AI Provider
- Enter your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
- Default model: `gpt-4o-mini`
**Anthropic:**
- Select **Anthropic** in Settings > AI Provider
- Enter your API key from [console.anthropic.com](https://console.anthropic.com)
- Default model: `claude-sonnet-4-6`
**OpenAI Compatible:**
- For any provider with an OpenAI-compatible API (vLLM, LiteLLM, etc.)
- Enter the API base URL, key, and model name
---
## Settings Reference
### Transcription
| Setting | Options | Default |
|---------|---------|---------|
| Whisper Model | tiny, base, small, medium, large-v3 | base |
| Device | CPU, CUDA | CPU |
| Language | Auto-detect, or specify (en, es, fr, etc.) | Auto-detect |
**Model recommendations:**
- **tiny/base:** Fast, good for clear audio with one speaker
- **small:** Best balance of speed and accuracy
- **medium:** Better accuracy, noticeably slower
- **large-v3:** Best accuracy, requires 8GB+ VRAM (GPU) or 16GB+ RAM (CPU)
### Debug
- **Enable Developer Tools:** Opens the browser inspector for debugging
---
## Troubleshooting
### Transcription is slow
- Use a smaller model (tiny or base)
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
- Ensure you downloaded the CUDA sidecar during setup
### Speaker detection not working
- Verify your HuggingFace token in Settings > Speakers
- Click "Test & Download Model" to re-download
- Make sure you accepted the license on all three model pages
### Audio won't play / No waveform
- Check that the audio file still exists at its original location
- Try re-importing the file
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA
### App shows "Setting up Voice to Notes"
- This is the first-launch sidecar download — it only happens once
- If it fails, check your internet connection and click Retry

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "voice-to-notes", "name": "voice-to-notes",
"version": "0.2.10", "version": "0.1.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "voice-to-notes", "name": "voice-to-notes",
"version": "0.2.10", "version": "0.1.0",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@tauri-apps/api": "^2", "@tauri-apps/api": "^2",

View File

@@ -1,6 +1,6 @@
{ {
"name": "voice-to-notes", "name": "voice-to-notes",
"version": "0.2.28", "version": "0.1.0",
"description": "Desktop app for transcribing audio/video with speaker identification", "description": "Desktop app for transcribing audio/video with speaker identification",
"type": "module", "type": "module",
"scripts": { "scripts": {

View File

@@ -92,42 +92,34 @@ def create_venv_and_install(cpu_only: bool) -> Path:
# Determine python path inside venv # Determine python path inside venv
if sys.platform == "win32": if sys.platform == "win32":
python = str(venv_dir / "Scripts" / "python.exe") python = str(venv_dir / "Scripts" / "python")
else: else:
python = str(venv_dir / "bin" / "python") python = str(venv_dir / "bin" / "python")
def pip_install(*args: str) -> None: def pkg_install(*args: str) -> None:
"""Install packages. Pass package names and flags only, not 'install'."""
if use_uv: if use_uv:
# Use --python with the venv directory (not the python binary) for uv subprocess.run(["uv", "pip", "install", "--python", python, *args], check=True)
subprocess.run(
["uv", "pip", "install", "--python", str(venv_dir), *args],
check=True,
)
else: else:
subprocess.run([python, "-m", "pip", "install", *args], check=True) subprocess.run([python, "-m", "pip", *args], check=True)
if not use_uv: if not use_uv:
# Upgrade pip (uv doesn't need this) # Upgrade pip (uv doesn't need this)
pip_install("--upgrade", "pip", "setuptools", "wheel") pkg_install("install", "--upgrade", "pip", "setuptools", "wheel")
# Install torch (CPU-only to avoid bundling ~2GB of CUDA libs) # Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
if cpu_only: if cpu_only:
print("[build] Installing PyTorch (CPU-only)") print("[build] Installing PyTorch (CPU-only)")
pip_install( pkg_install(
"torch", "torchaudio", "install", "torch", "torchaudio",
"--index-url", "https://download.pytorch.org/whl/cpu", "--index-url", "https://download.pytorch.org/whl/cpu",
) )
else: else:
print("[build] Installing PyTorch (CUDA 12.6)") print("[build] Installing PyTorch (default, may include CUDA)")
pip_install( pkg_install("install", "torch", "torchaudio")
"torch", "torchaudio",
"--index-url", "https://download.pytorch.org/whl/cu126",
)
# Install project and dev deps (includes pyinstaller) # Install project and dev deps (includes pyinstaller)
print("[build] Installing project dependencies") print("[build] Installing project dependencies")
pip_install("-e", f"{SCRIPT_DIR}[dev]") pkg_install("install", "-e", f"{SCRIPT_DIR}[dev]")
return Path(python) return Path(python)
@@ -239,9 +231,10 @@ def main() -> None:
python = create_venv_and_install(cpu_only) python = create_venv_and_install(cpu_only)
output_dir = run_pyinstaller(python) output_dir = run_pyinstaller(python)
download_ffmpeg(output_dir) download_ffmpeg(output_dir)
rename_binary(output_dir, target_triple)
print(f"\n[build] Done! Sidecar built at: {output_dir}") print(f"\n[build] Done! Sidecar built at: {output_dir}")
print(f"[build] Copy directory to src-tauri/sidecar/ for Tauri resource bundling") print(f"[build] Copy contents to src-tauri/binaries/ for Tauri bundling")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "voice-to-notes" name = "voice-to-notes"
version = "1.0.11" version = "0.1.0"
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services" description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
requires-python = ">=3.11" requires-python = ">=3.11"
license = "MIT" license = "MIT"
@@ -15,7 +15,6 @@ dependencies = [
"pysubs2>=1.7.0", "pysubs2>=1.7.0",
"openai>=1.0.0", "openai>=1.0.0",
"anthropic>=0.20.0", "anthropic>=0.20.0",
"soundfile>=0.12.0",
] ]
[project.optional-dependencies] [project.optional-dependencies]

View File

@@ -12,17 +12,15 @@ faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = co
"faster_whisper" "faster_whisper"
) )
pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote") pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
soundfile_datas, soundfile_binaries, soundfile_hiddenimports = collect_all("soundfile")
a = Analysis( a = Analysis(
["voice_to_notes/main.py"], ["voice_to_notes/main.py"],
pathex=[], pathex=[],
binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries + soundfile_binaries, binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas + soundfile_datas, datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
hiddenimports=[ hiddenimports=[
"torch", "torch",
"torchaudio", "torchaudio",
"soundfile",
"huggingface_hub", "huggingface_hub",
"pysubs2", "pysubs2",
"openai", "openai",
@@ -31,22 +29,11 @@ a = Analysis(
] ]
+ ctranslate2_hiddenimports + ctranslate2_hiddenimports
+ faster_whisper_hiddenimports + faster_whisper_hiddenimports
+ pyannote_hiddenimports + pyannote_hiddenimports,
+ soundfile_hiddenimports,
hookspath=[], hookspath=[],
hooksconfig={}, hooksconfig={},
runtime_hooks=[], runtime_hooks=[],
excludes=[ excludes=["tkinter", "test", "unittest", "pip", "setuptools"],
"tkinter", "test", "pip", "setuptools",
# ctranslate2.converters imports torch at module level and causes
# circular import crashes under PyInstaller. These modules are only
# needed for model format conversion, never for inference.
"ctranslate2.converters",
# torchcodec is partially bundled by PyInstaller but non-functional
# (missing FFmpeg shared libs). Excluding it forces pyannote.audio
# to fall back to torchaudio for audio decoding.
"torchcodec",
],
win_no_prefer_redirects=False, win_no_prefer_redirects=False,
win_private_assemblies=False, win_private_assemblies=False,
cipher=block_cipher, cipher=block_cipher,

View File

@@ -105,23 +105,14 @@ def detect_hardware() -> HardwareInfo:
# RAM info (cross-platform) # RAM info (cross-platform)
info.ram_mb = _detect_ram_mb() info.ram_mb = _detect_ram_mb()
# CUDA detection — verify runtime libraries actually work, not just torch detection # CUDA detection
try: try:
import torch import torch
if torch.cuda.is_available(): if torch.cuda.is_available():
# Test that CUDA runtime libraries are actually loadable
try:
torch.zeros(1, device="cuda")
info.has_cuda = True info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0) info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024) info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
except Exception as e:
print(
f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
file=sys.stderr,
flush=True,
)
except ImportError: except ImportError:
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True) print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)

View File

@@ -41,15 +41,11 @@ def ping_handler(msg: IPCMessage) -> IPCMessage:
def make_transcribe_handler() -> HandlerFunc: def make_transcribe_handler() -> HandlerFunc:
"""Create a transcription handler with a persistent TranscribeService.""" """Create a transcription handler with a persistent TranscribeService."""
service = None from voice_to_notes.services.transcribe import TranscribeService, result_to_payload
service = TranscribeService()
def handler(msg: IPCMessage) -> IPCMessage: def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.transcribe import TranscribeService
service = TranscribeService()
from voice_to_notes.services.transcribe import result_to_payload
payload = msg.payload payload = msg.payload
result = service.transcribe( result = service.transcribe(
request_id=msg.id, request_id=msg.id,
@@ -70,15 +66,11 @@ def make_transcribe_handler() -> HandlerFunc:
def make_diarize_handler() -> HandlerFunc: def make_diarize_handler() -> HandlerFunc:
"""Create a diarization handler with a persistent DiarizeService.""" """Create a diarization handler with a persistent DiarizeService."""
service = None from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
service = DiarizeService()
def handler(msg: IPCMessage) -> IPCMessage: def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.diarize import DiarizeService
service = DiarizeService()
from voice_to_notes.services.diarize import diarization_to_payload
payload = msg.payload payload = msg.payload
result = service.diarize( result = service.diarize(
request_id=msg.id, request_id=msg.id,
@@ -171,15 +163,11 @@ def make_diarize_download_handler() -> HandlerFunc:
def make_pipeline_handler() -> HandlerFunc: def make_pipeline_handler() -> HandlerFunc:
"""Create a full pipeline handler (transcribe + diarize + merge).""" """Create a full pipeline handler (transcribe + diarize + merge)."""
service = None from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
service = PipelineService()
def handler(msg: IPCMessage) -> IPCMessage: def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.pipeline import PipelineService
service = PipelineService()
from voice_to_notes.services.pipeline import pipeline_result_to_payload
payload = msg.payload payload = msg.payload
result = service.run( result = service.run(
request_id=msg.id, request_id=msg.id,
@@ -205,15 +193,11 @@ def make_pipeline_handler() -> HandlerFunc:
def make_export_handler() -> HandlerFunc: def make_export_handler() -> HandlerFunc:
"""Create an export handler.""" """Create an export handler."""
service = None from voice_to_notes.services.export import ExportService, make_export_request
service = ExportService()
def handler(msg: IPCMessage) -> IPCMessage: def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.export import ExportService
service = ExportService()
from voice_to_notes.services.export import make_export_request
request = make_export_request(msg.payload) request = make_export_request(msg.payload)
output_path = service.export(request) output_path = service.export(request)
return IPCMessage( return IPCMessage(
@@ -227,14 +211,11 @@ def make_export_handler() -> HandlerFunc:
def make_ai_chat_handler() -> HandlerFunc: def make_ai_chat_handler() -> HandlerFunc:
"""Create an AI chat handler with persistent AIProviderService.""" """Create an AI chat handler with persistent AIProviderService."""
service = None
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.ai_provider import create_default_service from voice_to_notes.services.ai_provider import create_default_service
service = create_default_service() service = create_default_service()
def handler(msg: IPCMessage) -> IPCMessage:
payload = msg.payload payload = msg.payload
action = payload.get("action", "chat") action = payload.get("action", "chat")
@@ -254,15 +235,15 @@ def make_ai_chat_handler() -> HandlerFunc:
) )
if action == "configure": if action == "configure":
# Re-create a provider with custom settings and set it active # Re-create a provider with custom settings
provider_name = payload.get("provider", "") provider_name = payload.get("provider", "")
config = payload.get("config", {}) config = payload.get("config", {})
if provider_name == "local": if provider_name == "local":
from voice_to_notes.providers.local_provider import LocalProvider from voice_to_notes.providers.local_provider import LocalProvider
service.register_provider("local", LocalProvider( service.register_provider("local", LocalProvider(
base_url=config.get("base_url", "http://localhost:11434/v1"), base_url=config.get("base_url", "http://localhost:8080"),
model=config.get("model", "llama3.2"), model=config.get("model", "local"),
)) ))
elif provider_name == "openai": elif provider_name == "openai":
from voice_to_notes.providers.openai_provider import OpenAIProvider from voice_to_notes.providers.openai_provider import OpenAIProvider
@@ -286,10 +267,6 @@ def make_ai_chat_handler() -> HandlerFunc:
api_key=config.get("api_key"), api_key=config.get("api_key"),
api_base=config.get("api_base"), api_base=config.get("api_base"),
)) ))
# Set the configured provider as active
print(f"[sidecar] Configured AI provider: {provider_name} with config: {config}", file=sys.stderr, flush=True)
if provider_name in ("local", "openai", "anthropic", "litellm"):
service.set_active(provider_name)
return IPCMessage( return IPCMessage(
id=msg.id, id=msg.id,
type="ai.configured", type="ai.configured",

View File

@@ -5,7 +5,6 @@ from __future__ import annotations
import signal import signal
import sys import sys
# CRITICAL: Capture real stdout for IPC *before* importing any ML libraries # CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
# that might print to stdout and corrupt the JSON-line protocol. # that might print to stdout and corrupt the JSON-line protocol.
from voice_to_notes.ipc.protocol import init_ipc from voice_to_notes.ipc.protocol import init_ipc

View File

@@ -1,4 +1,4 @@
"""Local AI provider — Ollama or any OpenAI-compatible API.""" """Local AI provider — bundled llama-server (OpenAI-compatible API)."""
from __future__ import annotations from __future__ import annotations
@@ -9,9 +9,9 @@ from voice_to_notes.providers.base import AIProvider
class LocalProvider(AIProvider): class LocalProvider(AIProvider):
"""Connects to Ollama or any OpenAI-compatible API server.""" """Connects to bundled llama-server via its OpenAI-compatible API."""
def __init__(self, base_url: str = "http://localhost:11434/v1", model: str = "llama3.2") -> None: def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
self._base_url = base_url.rstrip("/") self._base_url = base_url.rstrip("/")
self._model = model self._model = model
self._client: Any = None self._client: Any = None
@@ -24,8 +24,8 @@ class LocalProvider(AIProvider):
from openai import OpenAI from openai import OpenAI
self._client = OpenAI( self._client = OpenAI(
base_url=self._base_url, base_url=f"{self._base_url}/v1",
api_key="ollama", # Ollama doesn't require a real key api_key="not-needed", # llama-server doesn't require an API key
) )
except ImportError: except ImportError:
raise RuntimeError( raise RuntimeError(
@@ -47,9 +47,7 @@ class LocalProvider(AIProvider):
try: try:
import urllib.request import urllib.request
# Check base URL without /v1 suffix for Ollama root endpoint req = urllib.request.Request(f"{self._base_url}/health", method="GET")
root_url = self._base_url.replace("/v1", "")
req = urllib.request.Request(root_url, method="GET")
with urllib.request.urlopen(req, timeout=2) as resp: with urllib.request.urlopen(req, timeout=2) as resp:
return resp.status == 200 return resp.status == 200
except Exception: except Exception:
@@ -57,4 +55,4 @@ class LocalProvider(AIProvider):
@property @property
def name(self) -> str: def name(self) -> str:
return "Ollama" return "Local (llama-server)"

View File

@@ -20,72 +20,6 @@ from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
from voice_to_notes.ipc.messages import progress_message from voice_to_notes.ipc.messages import progress_message
from voice_to_notes.ipc.protocol import write_message from voice_to_notes.ipc.protocol import write_message
_patched = False
def _patch_pyannote_audio() -> None:
"""Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
unconditionally even when torchcodec is not installed, causing NameError.
This replaces the Audio.__call__ method with a torchaudio-based version.
"""
global _patched
if _patched:
return
_patched = True
try:
import numpy as np
import soundfile as sf
import torch
from pyannote.audio.core.io import Audio
def _sf_load(audio_path: str) -> tuple:
"""Load audio via soundfile, return (channels, samples) tensor + sample_rate."""
data, sample_rate = sf.read(str(audio_path), dtype="float32")
waveform = torch.from_numpy(np.array(data))
if waveform.ndim == 1:
waveform = waveform.unsqueeze(0)
else:
waveform = waveform.T
return waveform, sample_rate
def _soundfile_call(self, file: dict) -> tuple:
"""Replacement for Audio.__call__."""
return _sf_load(file["audio"])
def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
"""Replacement for Audio.crop — load full file then slice.
Pads short segments with zeros to match the expected duration,
which pyannote requires for batched embedding extraction.
"""
duration = kwargs.get("duration", None)
waveform, sample_rate = _sf_load(file["audio"])
# Convert segment (seconds) to sample indices
start_sample = int(segment.start * sample_rate)
end_sample = int(segment.end * sample_rate)
# Clamp to bounds
start_sample = max(0, start_sample)
end_sample = min(waveform.shape[-1], end_sample)
cropped = waveform[:, start_sample:end_sample]
# Pad to expected duration if needed (pyannote batches require uniform size)
if duration is not None:
expected_samples = int(duration * sample_rate)
else:
expected_samples = int((segment.end - segment.start) * sample_rate)
if cropped.shape[-1] < expected_samples:
pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1])
cropped = torch.cat([cropped, pad], dim=-1)
return cropped, sample_rate
Audio.__call__ = _soundfile_call # type: ignore[assignment]
Audio.crop = _soundfile_crop # type: ignore[assignment]
print("[sidecar] Patched pyannote Audio to use soundfile", file=sys.stderr, flush=True)
except Exception as e:
print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
def _ensure_wav(file_path: str) -> tuple[str, str | None]: def _ensure_wav(file_path: str) -> tuple[str, str | None]:
"""Convert audio to 16kHz mono WAV if needed. """Convert audio to 16kHz mono WAV if needed.
@@ -179,7 +113,6 @@ class DiarizeService:
] ]
last_error: Exception | None = None last_error: Exception | None = None
_patch_pyannote_audio()
for model_name in models: for model_name in models:
try: try:
from pyannote.audio import Pipeline from pyannote.audio import Pipeline

View File

@@ -77,28 +77,11 @@ class TranscribeService:
file=sys.stderr, file=sys.stderr,
flush=True, flush=True,
) )
try:
self._model = WhisperModel( self._model = WhisperModel(
model_name, model_name,
device=device, device=device,
compute_type=compute_type, compute_type=compute_type,
) )
except Exception as e:
if device != "cpu":
print(
f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
file=sys.stderr,
flush=True,
)
device = "cpu"
compute_type = "int8"
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
else:
raise
self._current_model_name = model_name self._current_model_name = model_name
self._current_device = device self._current_device = device
self._current_compute_type = compute_type self._current_compute_type = compute_type

52
src-tauri/Cargo.lock generated
View File

@@ -59,15 +59,6 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]] [[package]]
name = "async-broadcast" name = "async-broadcast"
version = "0.7.2" version = "0.7.2"
@@ -664,17 +655,6 @@ dependencies = [
"serde_core", "serde_core",
] ]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.20" version = "0.99.20"
@@ -4382,7 +4362,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]] [[package]]
name = "voice-to-notes" name = "voice-to-notes"
version = "0.2.2" version = "0.1.0"
dependencies = [ dependencies = [
"chrono", "chrono",
"rusqlite", "rusqlite",
@@ -4394,7 +4374,6 @@ dependencies = [
"tauri-plugin-opener", "tauri-plugin-opener",
"thiserror 1.0.69", "thiserror 1.0.69",
"uuid", "uuid",
"zip",
] ]
[[package]] [[package]]
@@ -5433,41 +5412,12 @@ dependencies = [
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "zip"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
dependencies = [
"arbitrary",
"crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2",
"indexmap 2.13.0",
"memchr",
"thiserror 2.0.18",
"zopfli",
]
[[package]] [[package]]
name = "zmij" name = "zmij"
version = "1.0.21" version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zopfli"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]
[[package]] [[package]]
name = "zvariant" name = "zvariant"
version = "5.10.0" version = "5.10.0"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "voice-to-notes" name = "voice-to-notes"
version = "0.2.28" version = "0.1.0"
description = "Voice to Notes — desktop transcription with speaker identification" description = "Voice to Notes — desktop transcription with speaker identification"
authors = ["Voice to Notes Contributors"] authors = ["Voice to Notes Contributors"]
license = "MIT" license = "MIT"
@@ -14,16 +14,12 @@ crate-type = ["staticlib", "cdylib", "rlib"]
tauri-build = { version = "2", features = [] } tauri-build = { version = "2", features = [] }
[dependencies] [dependencies]
tauri = { version = "2", features = ["protocol-asset", "devtools"] } tauri = { version = "2", features = ["protocol-asset"] }
tauri-plugin-opener = "2" tauri-plugin-opener = "2"
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
serde_json = "1" serde_json = "1"
rusqlite = { version = "0.31", features = ["bundled"] } rusqlite = { version = "0.31", features = ["bundled"] }
uuid = { version = "1", features = ["v4", "serde"] } uuid = { version = "1", features = ["v4", "serde"] }
zip = { version = "2", default-features = false, features = ["deflate"] }
thiserror = "1" thiserror = "1"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
tauri-plugin-dialog = "2.6.0" tauri-plugin-dialog = "2.6.0"
reqwest = { version = "0.12", features = ["stream", "json"] }
futures-util = "0.3"
bytes = "1"

View File

@@ -1,21 +1,3 @@
fn main() { fn main() {
// Ensure sidecar.zip exists so tauri-build doesn't fail.
// CI replaces this placeholder with the real PyInstaller sidecar archive.
let zip_path = std::path::Path::new("sidecar.zip");
if !zip_path.exists() {
// Minimal valid zip (empty archive): end-of-central-directory record
let empty_zip: [u8; 22] = [
0x50, 0x4b, 0x05, 0x06, // EOCD signature
0x00, 0x00, // disk number
0x00, 0x00, // disk with central dir
0x00, 0x00, // entries on this disk
0x00, 0x00, // total entries
0x00, 0x00, 0x00, 0x00, // central dir size
0x00, 0x00, 0x00, 0x00, // central dir offset
0x00, 0x00, // comment length
];
std::fs::write(zip_path, empty_zip).expect("Failed to create placeholder sidecar.zip");
}
tauri_build::build() tauri_build::build()
} }

View File

@@ -1,11 +0,0 @@
; NSIS uninstall hook for Voice to Notes
; Removes the sidecar data directory (extracted sidecar binaries + logs)
; but preserves user data in $PROFILE\.voicetonotes (database, settings, models)
!macro NSIS_HOOK_POSTUNINSTALL
; Remove the Tauri app_local_data_dir which contains:
; - Extracted sidecar directories (voice-to-notes-sidecar/)
; - sidecar.log
; Path: %LOCALAPPDATA%\com.voicetonotes.app
RMDir /r "$LOCALAPPDATA\com.voicetonotes.app"
!macroend

View File

@@ -2,6 +2,5 @@ pub mod ai;
pub mod export; pub mod export;
pub mod project; pub mod project;
pub mod settings; pub mod settings;
pub mod sidecar;
pub mod system; pub mod system;
pub mod transcribe; pub mod transcribe;

View File

@@ -32,16 +32,3 @@ pub fn save_settings(settings: Value) -> Result<(), String> {
fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?; fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
Ok(()) Ok(())
} }
/// Toggle devtools on the main window.
#[tauri::command]
pub fn toggle_devtools(app: tauri::AppHandle, open: bool) {
use tauri::Manager;
if let Some(window) = app.get_webview_window("main") {
if open {
window.open_devtools();
} else {
window.close_devtools();
}
}
}

View File

@@ -1,252 +0,0 @@
use futures_util::StreamExt;
use serde::Serialize;
use std::io::Write;
use tauri::{AppHandle, Emitter};
use crate::sidecar::{SidecarManager, DATA_DIR};
const REPO_API: &str = "https://repo.anhonesthost.net/api/v1/repos/MacroPad/voice-to-notes";
#[derive(Serialize, Clone)]
struct DownloadProgress {
downloaded: u64,
total: u64,
percent: u8,
}
#[derive(Serialize)]
pub struct UpdateInfo {
pub current_version: String,
pub latest_version: String,
}
/// Read the locally installed sidecar version from `sidecar-version.txt`.
/// Returns `None` if the file doesn't exist or can't be read.
fn read_local_sidecar_version() -> Option<String> {
let data_dir = DATA_DIR.get()?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::read_to_string(version_file)
.ok()
.map(|v| v.trim().to_string())
.filter(|v| !v.is_empty())
}
/// Write the sidecar version to `sidecar-version.txt` after a successful download.
fn write_local_sidecar_version(version: &str) -> Result<(), String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::write(&version_file, version)
.map_err(|e| format!("Failed to write sidecar version file: {}", e))
}
/// Fetch releases from the Gitea API and find the latest sidecar release
/// (one whose tag_name starts with "sidecar-v").
async fn fetch_latest_sidecar_release(
client: &reqwest::Client,
) -> Result<serde_json::Value, String> {
let releases_url = format!("{}/releases?limit=20", REPO_API);
let resp = client
.get(&releases_url)
.header("Accept", "application/json")
.send()
.await
.map_err(|e| format!("Failed to fetch releases: {}", e))?;
if !resp.status().is_success() {
return Err(format!("Failed to fetch releases: HTTP {}", resp.status()));
}
let releases = resp
.json::<Vec<serde_json::Value>>()
.await
.map_err(|e| format!("Failed to parse releases JSON: {}", e))?;
releases
.into_iter()
.find(|r| {
r["tag_name"]
.as_str()
.map_or(false, |t| t.starts_with("sidecar-v"))
})
.ok_or_else(|| "No sidecar release found".to_string())
}
/// Extract the version string from a sidecar tag name (e.g. "sidecar-v1.0.1" -> "1.0.1").
fn version_from_sidecar_tag(tag: &str) -> &str {
tag.strip_prefix("sidecar-v").unwrap_or(tag)
}
/// Check if the sidecar binary exists for the currently installed version.
#[tauri::command]
pub fn check_sidecar() -> bool {
let data_dir = match DATA_DIR.get() {
Some(d) => d,
None => return false,
};
let version = match read_local_sidecar_version() {
Some(v) => v,
None => return false,
};
let binary_name = if cfg!(target_os = "windows") {
"voice-to-notes-sidecar.exe"
} else {
"voice-to-notes-sidecar"
};
let extract_dir = data_dir.join(format!("sidecar-{}", version));
extract_dir.join(binary_name).exists()
}
/// Determine the current platform name for asset downloads.
fn platform_os() -> &'static str {
if cfg!(target_os = "windows") {
"windows"
} else if cfg!(target_os = "macos") {
"macos"
} else {
"linux"
}
}
/// Determine the current architecture name for asset downloads.
fn platform_arch() -> &'static str {
if cfg!(target_arch = "aarch64") {
"aarch64"
} else {
"x86_64"
}
}
/// Download the sidecar binary for the given variant (cpu or cuda).
#[tauri::command]
pub async fn download_sidecar(app: AppHandle, variant: String) -> Result<(), String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let os = platform_os();
let arch = platform_arch();
let asset_name = format!("sidecar-{}-{}-{}.zip", os, arch, variant);
// Fetch the latest sidecar release from Gitea API
let client = reqwest::Client::new();
let sidecar_release = fetch_latest_sidecar_release(&client).await?;
let tag = sidecar_release["tag_name"]
.as_str()
.ok_or("No tag_name in sidecar release")?;
let sidecar_version = version_from_sidecar_tag(tag).to_string();
// Find the matching asset
let assets = sidecar_release["assets"]
.as_array()
.ok_or("No assets found in sidecar release")?;
let download_url = assets
.iter()
.find(|a| a["name"].as_str() == Some(&asset_name))
.and_then(|a| a["browser_download_url"].as_str())
.ok_or_else(|| {
format!(
"Asset '{}' not found in sidecar release {}",
asset_name, tag
)
})?
.to_string();
// Stream download with progress events
let response: reqwest::Response = client
.get(&download_url)
.send()
.await
.map_err(|e| format!("Failed to start download: {}", e))?;
if !response.status().is_success() {
return Err(format!("Download failed: HTTP {}", response.status()));
}
let total: u64 = response.content_length().unwrap_or(0);
let mut downloaded: u64 = 0;
let mut stream = response.bytes_stream();
let zip_path = data_dir.join("sidecar.zip");
let mut file = std::fs::File::create(&zip_path)
.map_err(|e| format!("Failed to create zip file: {}", e))?;
while let Some(chunk) = stream.next().await {
let chunk: bytes::Bytes = chunk.map_err(|e| format!("Download stream error: {}", e))?;
file.write_all(&chunk)
.map_err(|e| format!("Failed to write chunk: {}", e))?;
downloaded += chunk.len() as u64;
let percent = if total > 0 {
(downloaded * 100 / total) as u8
} else {
0
};
let _ = app.emit(
"sidecar-download-progress",
DownloadProgress {
downloaded,
total,
percent,
},
);
}
// Extract the downloaded zip
let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
SidecarManager::extract_zip(&zip_path, &extract_dir)?;
// Make the binary executable on Unix
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let binary_path = extract_dir.join("voice-to-notes-sidecar");
if let Ok(meta) = std::fs::metadata(&binary_path) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&binary_path, perms);
}
}
// Write the sidecar version file
write_local_sidecar_version(&sidecar_version)?;
// Clean up the zip file and old sidecar versions
let _ = std::fs::remove_file(&zip_path);
SidecarManager::cleanup_old_sidecars(data_dir, &sidecar_version);
Ok(())
}
/// Check if a sidecar update is available.
#[tauri::command]
pub async fn check_sidecar_update() -> Result<Option<UpdateInfo>, String> {
// If sidecar doesn't exist yet, return None (first launch handled separately)
if !check_sidecar() {
return Ok(None);
}
let current_version = match read_local_sidecar_version() {
Some(v) => v,
None => return Ok(None),
};
// Fetch latest sidecar release from Gitea API
let client = reqwest::Client::new();
let sidecar_release = fetch_latest_sidecar_release(&client).await?;
let latest_tag = sidecar_release["tag_name"]
.as_str()
.ok_or("No tag_name in sidecar release")?;
let latest_version = version_from_sidecar_tag(latest_tag);
if latest_version != current_version {
Ok(Some(UpdateInfo {
current_version,
latest_version: latest_version.to_string(),
}))
} else {
Ok(None)
}
}

View File

@@ -60,18 +60,3 @@ pub fn llama_list_models() -> Value {
pub fn get_data_dir() -> String { pub fn get_data_dir() -> String {
LlamaManager::data_dir().to_string_lossy().to_string() LlamaManager::data_dir().to_string_lossy().to_string()
} }
/// Log a message from the frontend to a file for debugging.
#[tauri::command]
pub fn log_frontend(level: String, message: String) {
use std::io::Write;
let log_path = LlamaManager::data_dir().join("frontend.log");
if let Ok(mut file) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&log_path)
{
let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
let _ = writeln!(file, "[{timestamp}] [{level}] {message}");
}
}

View File

@@ -13,11 +13,8 @@ use commands::project::{
create_project, delete_project, get_project, list_projects, load_project_file, create_project, delete_project, get_project, list_projects, load_project_file,
load_project_transcript, save_project_file, save_project_transcript, update_segment, load_project_transcript, save_project_file, save_project_transcript, update_segment,
}; };
use commands::settings::{load_settings, save_settings, toggle_devtools}; use commands::settings::{load_settings, save_settings};
use commands::sidecar::{check_sidecar, check_sidecar_update, download_sidecar}; use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::system::{
get_data_dir, llama_list_models, llama_start, llama_status, llama_stop, log_frontend,
};
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file}; use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
use state::AppState; use state::AppState;
@@ -30,14 +27,6 @@ pub fn run() {
.plugin(tauri_plugin_dialog::init()) .plugin(tauri_plugin_dialog::init())
.manage(app_state) .manage(app_state)
.setup(|app| { .setup(|app| {
// Tell the sidecar manager where Tauri placed bundled resources
// and where to extract the sidecar archive
if let (Ok(resource_dir), Ok(data_dir)) =
(app.path().resource_dir(), app.path().app_local_data_dir())
{
sidecar::init_dirs(resource_dir, data_dir);
}
// Set the webview background to match the app's dark theme // Set the webview background to match the app's dark theme
if let Some(window) = app.get_webview_window("main") { if let Some(window) = app.get_webview_window("main") {
let _ = window.set_background_color(Some(Color(10, 10, 35, 255))); let _ = window.set_background_color(Some(Color(10, 10, 35, 255)));
@@ -68,11 +57,6 @@ pub fn run() {
get_data_dir, get_data_dir,
load_settings, load_settings,
save_settings, save_settings,
check_sidecar,
download_sidecar,
check_sidecar_update,
log_frontend,
toggle_devtools,
]) ])
.run(tauri::generate_context!()) .run(tauri::generate_context!())
.expect("error while running tauri application"); .expect("error while running tauri application");

View File

@@ -2,27 +2,11 @@ pub mod ipc;
pub mod messages; pub mod messages;
use std::io::{BufRead, BufReader, Write}; use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, Command, Stdio}; use std::process::{Child, ChildStdin, Command, Stdio};
use std::sync::{Mutex, OnceLock}; use std::sync::{Mutex, OnceLock};
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
use crate::sidecar::messages::IPCMessage; use crate::sidecar::messages::IPCMessage;
/// Resource directory set by the Tauri app during setup.
static RESOURCE_DIR: OnceLock<PathBuf> = OnceLock::new();
/// App data directory for extracting the sidecar archive.
pub(crate) static DATA_DIR: OnceLock<PathBuf> = OnceLock::new();
/// Initialize directories for sidecar resolution.
/// Must be called from the Tauri setup before any sidecar operations.
pub fn init_dirs(resource_dir: PathBuf, data_dir: PathBuf) {
RESOURCE_DIR.set(resource_dir).ok();
DATA_DIR.set(data_dir).ok();
}
/// Get the global sidecar manager singleton. /// Get the global sidecar manager singleton.
pub fn sidecar() -> &'static SidecarManager { pub fn sidecar() -> &'static SidecarManager {
static INSTANCE: OnceLock<SidecarManager> = OnceLock::new(); static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
@@ -56,193 +40,38 @@ impl SidecarManager {
cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok() cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
} }
/// Read the locally installed sidecar version from `sidecar-version.txt`.
fn read_sidecar_version() -> Result<String, String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::read_to_string(&version_file)
.map_err(|_| {
"Sidecar not installed: sidecar-version.txt not found. Please download the sidecar."
.to_string()
})
.map(|v| v.trim().to_string())
.and_then(|v| {
if v.is_empty() {
Err(
"Sidecar version file is empty. Please re-download the sidecar."
.to_string(),
)
} else {
Ok(v)
}
})
}
/// Resolve the frozen sidecar binary path (production mode). /// Resolve the frozen sidecar binary path (production mode).
/// fn resolve_sidecar_path() -> Result<std::path::PathBuf, String> {
/// Reads the installed sidecar version from `sidecar-version.txt` and let exe = std::env::current_exe().map_err(|e| format!("Cannot get current exe: {e}"))?;
/// looks for the binary in the corresponding `sidecar-{version}` directory. let exe_dir = exe
/// If the version file doesn't exist, the sidecar hasn't been downloaded yet. .parent()
fn resolve_sidecar_path() -> Result<PathBuf, String> { .ok_or_else(|| "Cannot get exe parent directory".to_string())?;
let binary_name = if cfg!(target_os = "windows") { let binary_name = if cfg!(target_os = "windows") {
"voice-to-notes-sidecar.exe" "voice-to-notes-sidecar.exe"
} else { } else {
"voice-to-notes-sidecar" "voice-to-notes-sidecar"
}; };
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?; // Tauri places externalBin next to the app binary
let current_version = Self::read_sidecar_version()?; let path = exe_dir.join(binary_name);
let extract_dir = data_dir.join(format!("sidecar-{}", current_version));
let binary_path = extract_dir.join(binary_name);
// Already extracted — use it directly
if binary_path.exists() {
Self::cleanup_old_sidecars(data_dir, &current_version);
return Ok(binary_path);
}
// Find sidecar.zip in resource dir or next to exe
let zip_path = Self::find_sidecar_zip()?;
Self::extract_zip(&zip_path, &extract_dir)?;
if !binary_path.exists() {
return Err(format!(
"Sidecar binary not found after extraction at {}",
binary_path.display()
));
}
// Make executable on Unix
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(meta) = std::fs::metadata(&binary_path) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&binary_path, perms);
}
}
Self::cleanup_old_sidecars(data_dir, &current_version);
Ok(binary_path)
}
/// Locate the bundled sidecar.zip archive.
fn find_sidecar_zip() -> Result<PathBuf, String> {
let mut candidates: Vec<PathBuf> = Vec::new();
if let Some(resource_dir) = RESOURCE_DIR.get() {
candidates.push(resource_dir.join("sidecar.zip"));
}
if let Ok(exe) = std::env::current_exe() {
if let Some(exe_dir) = exe.parent() {
candidates.push(exe_dir.join("sidecar.zip"));
}
}
for path in &candidates {
if path.exists() { if path.exists() {
return Ok(path.clone()); return Ok(path);
} }
// Also check inside a subdirectory (onedir PyInstaller output)
let subdir_path = exe_dir.join("voice-to-notes-sidecar").join(binary_name);
if subdir_path.exists() {
return Ok(subdir_path);
} }
Err(format!( Err(format!(
"Sidecar archive not found. Checked:\n{}", "Sidecar binary not found. Looked for:\n {}\n {}",
candidates path.display(),
.iter() subdir_path.display(),
.map(|p| format!(" {}", p.display()))
.collect::<Vec<_>>()
.join("\n"),
)) ))
} }
/// Extract a zip archive to the given directory.
pub(crate) fn extract_zip(zip_path: &Path, dest: &Path) -> Result<(), String> {
eprintln!(
"[sidecar-rs] Extracting sidecar from {} to {}",
zip_path.display(),
dest.display()
);
// Clean destination so we don't mix old and new files
if dest.exists() {
std::fs::remove_dir_all(dest)
.map_err(|e| format!("Failed to clean extraction dir: {e}"))?;
}
std::fs::create_dir_all(dest)
.map_err(|e| format!("Failed to create extraction dir: {e}"))?;
let file =
std::fs::File::open(zip_path).map_err(|e| format!("Cannot open sidecar zip: {e}"))?;
let mut archive =
zip::ZipArchive::new(file).map_err(|e| format!("Invalid sidecar zip: {e}"))?;
for i in 0..archive.len() {
let mut entry = archive
.by_index(i)
.map_err(|e| format!("Zip entry error: {e}"))?;
let name = entry.name().to_string();
let outpath = dest.join(&name);
if entry.is_dir() {
std::fs::create_dir_all(&outpath)
.map_err(|e| format!("Cannot create dir {}: {e}", outpath.display()))?;
} else {
if let Some(parent) = outpath.parent() {
std::fs::create_dir_all(parent)
.map_err(|e| format!("Cannot create dir {}: {e}", parent.display()))?;
}
let mut outfile = std::fs::File::create(&outpath)
.map_err(|e| format!("Cannot create {}: {e}", outpath.display()))?;
std::io::copy(&mut entry, &mut outfile)
.map_err(|e| format!("Write error for {}: {e}", name))?;
}
}
eprintln!("[sidecar-rs] Sidecar extracted successfully");
Ok(())
}
/// Remove old sidecar-* directories that don't match the current version.
/// Called after the current version's sidecar is confirmed ready.
pub(crate) fn cleanup_old_sidecars(data_dir: &Path, current_version: &str) {
let current_dir_name = format!("sidecar-{}", current_version);
let entries = match std::fs::read_dir(data_dir) {
Ok(entries) => entries,
Err(e) => {
eprintln!("[sidecar-rs] Cannot read data dir for cleanup: {e}");
return;
}
};
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with("sidecar-") {
continue;
}
if *name_str == current_dir_name {
continue;
}
if entry.path().is_dir() {
eprintln!(
"[sidecar-rs] Removing old sidecar: {}",
entry.path().display()
);
if let Err(e) = std::fs::remove_dir_all(entry.path()) {
eprintln!(
"[sidecar-rs] Failed to remove {}: {e}",
entry.path().display()
);
}
}
}
}
/// Find a working Python command for the current platform. /// Find a working Python command for the current platform.
fn find_python_command() -> &'static str { fn find_python_command() -> &'static str {
if cfg!(target_os = "windows") { if cfg!(target_os = "windows") {
@@ -285,8 +114,15 @@ impl SidecarManager {
if Self::is_dev_mode() { if Self::is_dev_mode() {
self.start_python_dev() self.start_python_dev()
} else { } else {
let path = Self::resolve_sidecar_path()?; match Self::resolve_sidecar_path() {
self.start_binary(&path) Ok(path) => self.start_binary(&path),
Err(e) => {
eprintln!(
"[sidecar-rs] Frozen binary not found ({e}), falling back to dev mode"
);
self.start_python_dev()
}
}
} }
} }
@@ -295,33 +131,10 @@ impl SidecarManager {
self.stop().ok(); self.stop().ok();
eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display()); eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
// Log sidecar stderr to a file for diagnostics let child = Command::new(path)
let stderr_cfg = if let Some(data_dir) = DATA_DIR.get() { .stdin(Stdio::piped())
let _ = std::fs::create_dir_all(data_dir);
let log_path = data_dir.join("sidecar.log");
eprintln!("[sidecar-rs] Sidecar stderr → {}", log_path.display());
match std::fs::File::create(&log_path) {
Ok(f) => Stdio::from(f),
Err(e) => {
eprintln!("[sidecar-rs] Failed to create sidecar.log: {e}");
Stdio::inherit()
}
}
} else {
eprintln!("[sidecar-rs] DATA_DIR not set, sidecar stderr will not be logged");
Stdio::inherit()
};
let mut cmd = Command::new(path);
cmd.stdin(Stdio::piped())
.stdout(Stdio::piped()) .stdout(Stdio::piped())
.stderr(stderr_cfg); .stderr(Stdio::inherit())
// Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
#[cfg(target_os = "windows")]
cmd.creation_flags(0x08000000);
let child = cmd
.spawn() .spawn()
.map_err(|e| format!("Failed to start sidecar binary: {e}"))?; .map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
@@ -387,22 +200,7 @@ impl SidecarManager {
.read_line(&mut line) .read_line(&mut line)
.map_err(|e| format!("Read error: {e}"))?; .map_err(|e| format!("Read error: {e}"))?;
if bytes == 0 { if bytes == 0 {
// Try to get the exit code for diagnostics return Err("Sidecar closed stdout before sending ready".to_string());
let exit_info = {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = *proc {
match child.try_wait() {
Ok(Some(status)) => format!(" (exit status: {status})"),
_ => String::new(),
}
} else {
String::new()
}
};
return Err(format!(
"Sidecar closed stdout before sending ready{exit_info}. \
The Python sidecar may have crashed on startup — check app logs for details."
));
} }
let trimmed = line.trim(); let trimmed = line.trim();
if trimmed.is_empty() { if trimmed.is_empty() {
@@ -432,46 +230,11 @@ impl SidecarManager {
/// Send a message and receive the response, calling a callback for intermediate messages. /// Send a message and receive the response, calling a callback for intermediate messages.
/// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update. /// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
///
/// If the sidecar has crashed (broken pipe), automatically restarts it and retries once.
pub fn send_and_receive_with_progress<F>( pub fn send_and_receive_with_progress<F>(
&self, &self,
msg: &IPCMessage, msg: &IPCMessage,
on_intermediate: F, on_intermediate: F,
) -> Result<IPCMessage, String> ) -> Result<IPCMessage, String>
where
F: Fn(&IPCMessage),
{
match self.send_and_receive_inner(msg, &on_intermediate) {
Ok(response) => Ok(response),
Err(e)
if e.contains("Write error")
|| e.contains("closed stdout")
|| e.contains("not available") =>
{
eprintln!("[sidecar-rs] Sidecar communication failed ({e}), restarting...");
self.cleanup_handles();
// Stop any zombie process
{
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = proc.take() {
let _ = child.kill();
let _ = child.wait();
}
}
self.ensure_running()?;
self.send_and_receive_inner(msg, &on_intermediate)
}
Err(e) => Err(e),
}
}
/// Inner implementation of send_and_receive.
fn send_and_receive_inner<F>(
&self,
msg: &IPCMessage,
on_intermediate: &F,
) -> Result<IPCMessage, String>
where where
F: Fn(&IPCMessage), F: Fn(&IPCMessage),
{ {
@@ -557,39 +320,8 @@ impl SidecarManager {
} }
pub fn is_running(&self) -> bool { pub fn is_running(&self) -> bool {
let mut proc = match self.process.lock() { let proc = self.process.lock().ok();
Ok(p) => p, proc.map_or(false, |p| p.is_some())
Err(_) => return false,
};
if let Some(ref mut child) = *proc {
// Check if the process has exited
match child.try_wait() {
Ok(Some(_status)) => {
// Process has exited — clean up handles
eprintln!("[sidecar-rs] Sidecar process has exited");
drop(proc);
let _ = self.cleanup_handles();
false
}
Ok(None) => true, // Still running
Err(_) => false,
}
} else {
false
}
}
/// Clean up stdin/stdout/process handles after the sidecar has exited.
fn cleanup_handles(&self) {
if let Ok(mut s) = self.stdin.lock() {
*s = None;
}
if let Ok(mut r) = self.reader.lock() {
*r = None;
}
if let Ok(mut p) = self.process.lock() {
*p = None;
}
} }
} }

View File

@@ -1,7 +1,7 @@
{ {
"$schema": "https://schema.tauri.app/config/2", "$schema": "https://schema.tauri.app/config/2",
"productName": "Voice to Notes", "productName": "Voice to Notes",
"version": "0.2.28", "version": "0.1.0",
"identifier": "com.voicetonotes.app", "identifier": "com.voicetonotes.app",
"build": { "build": {
"beforeDevCommand": "npm run dev", "beforeDevCommand": "npm run dev",
@@ -22,7 +22,7 @@
} }
], ],
"security": { "security": {
"csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'", "csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
"assetProtocol": { "assetProtocol": {
"enable": true, "enable": true,
"scope": ["**"] "scope": ["**"]
@@ -31,7 +31,7 @@
}, },
"bundle": { "bundle": {
"active": true, "active": true,
"targets": ["deb", "rpm", "nsis", "msi", "dmg"], "targets": "all",
"icon": [ "icon": [
"icons/32x32.png", "icons/32x32.png",
"icons/128x128.png", "icons/128x128.png",
@@ -42,18 +42,17 @@
"category": "Utility", "category": "Utility",
"shortDescription": "Transcribe audio/video with speaker identification", "shortDescription": "Transcribe audio/video with speaker identification",
"longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.", "longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
"resources": [],
"copyright": "Voice to Notes Contributors", "copyright": "Voice to Notes Contributors",
"license": "MIT", "license": "MIT",
"linux": { "linux": {
"deb": { "deb": {
"depends": [] "depends": []
},
"appimage": {
"bundleMediaFramework": true
} }
}, },
"windows": { "windows": {
"nsis": {
"installerHooks": "nsis-hooks.nsh"
},
"wix": { "wix": {
"language": "en-US" "language": "en-US"
} }

View File

@@ -11,7 +11,7 @@
let { visible, onClose }: Props = $props(); let { visible, onClose }: Props = $props();
let localSettings = $state<AppSettings>({ ...$settings }); let localSettings = $state<AppSettings>({ ...$settings });
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'debug'>('transcription'); let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle'); let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
let modelError = $state(''); let modelError = $state('');
let revealedFields = $state<Set<string>>(new Set()); let revealedFields = $state<Set<string>>(new Set());
@@ -81,8 +81,8 @@
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}> <button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
AI Provider AI Provider
</button> </button>
<button class="tab" class:active={activeTab === 'debug'} onclick={() => activeTab = 'debug'}> <button class="tab" class:active={activeTab === 'local'} onclick={() => activeTab = 'local'}>
Debug Local AI
</button> </button>
</div> </div>
@@ -181,27 +181,14 @@
<div class="field"> <div class="field">
<label for="ai-provider">AI Provider</label> <label for="ai-provider">AI Provider</label>
<select id="ai-provider" bind:value={localSettings.ai_provider}> <select id="ai-provider" bind:value={localSettings.ai_provider}>
<option value="local">Ollama</option> <option value="local">Local (llama-server)</option>
<option value="openai">OpenAI</option> <option value="openai">OpenAI</option>
<option value="anthropic">Anthropic</option> <option value="anthropic">Anthropic</option>
<option value="litellm">OpenAI Compatible</option> <option value="litellm">OpenAI Compatible</option>
</select> </select>
</div> </div>
{#if localSettings.ai_provider === 'local'} {#if localSettings.ai_provider === 'openai'}
<div class="field">
<label for="ollama-url">Ollama URL</label>
<input id="ollama-url" type="text" bind:value={localSettings.ollama_url} placeholder="http://localhost:11434" />
</div>
<div class="field">
<label for="ollama-model">Model</label>
<input id="ollama-model" type="text" bind:value={localSettings.ollama_model} placeholder="llama3.2" />
</div>
<p class="hint">
Install Ollama from ollama.com, then pull a model with <code>ollama pull llama3.2</code>.
The app connects via Ollama's OpenAI-compatible API.
</p>
{:else if localSettings.ai_provider === 'openai'}
<div class="field"> <div class="field">
<label for="openai-key">OpenAI API Key</label> <label for="openai-key">OpenAI API Key</label>
<div class="input-reveal"> <div class="input-reveal">
@@ -242,21 +229,19 @@
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" /> <input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
</div> </div>
{/if} {/if}
{:else if activeTab === 'debug'} {:else}
<div class="field checkbox"> <div class="field">
<label> <label for="llama-binary">llama-server Binary Path</label>
<input <input id="llama-binary" type="text" bind:value={localSettings.local_binary_path} placeholder="llama-server" />
type="checkbox"
checked={localSettings.devtools_enabled}
onchange={async (e) => {
localSettings.devtools_enabled = (e.target as HTMLInputElement).checked;
await invoke('toggle_devtools', { open: localSettings.devtools_enabled });
}}
/>
Enable Developer Tools
</label>
<p class="hint">Opens the browser inspector for debugging. Changes take effect immediately.</p>
</div> </div>
<div class="field">
<label for="llama-model">GGUF Model Path</label>
<input id="llama-model" type="text" bind:value={localSettings.local_model_path} placeholder="~/.voicetonotes/models/model.gguf" />
</div>
<p class="hint">
Place GGUF model files in ~/.voicetonotes/models/ for auto-detection.
The local AI server uses the OpenAI-compatible API from llama.cpp.
</p>
{/if} {/if}
</div> </div>

View File

@@ -1,320 +0,0 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { listen } from '@tauri-apps/api/event';
import type { UnlistenFn } from '@tauri-apps/api/event';
import { onMount } from 'svelte';
interface Props {
onComplete: () => void;
}
let { onComplete }: Props = $props();
let variant = $state<'cpu' | 'cuda'>('cpu');
let downloading = $state(false);
let downloadProgress = $state({ downloaded: 0, total: 0, percent: 0 });
let error = $state('');
let success = $state(false);
let unlisten: UnlistenFn | null = null;
onMount(() => {
return () => {
unlisten?.();
};
});
function formatBytes(bytes: number): string {
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
}
async function startDownload() {
downloading = true;
error = '';
success = false;
unlisten = await listen<{ downloaded: number; total: number; percent: number }>(
'sidecar-download-progress',
(event) => {
downloadProgress = event.payload;
}
);
try {
await invoke('download_sidecar', { variant });
success = true;
// Brief pause so the user sees "Complete" before the screen goes away
setTimeout(() => {
onComplete();
}, 800);
} catch (err) {
error = String(err);
} finally {
downloading = false;
unlisten?.();
unlisten = null;
}
}
</script>
<div class="setup-overlay">
<div class="setup-card">
<h1 class="app-title">Voice to Notes</h1>
<h2 class="setup-heading">First-Time Setup</h2>
<p class="setup-description">
Voice to Notes needs to download its AI engine to transcribe audio.
</p>
{#if !downloading && !success}
<div class="variant-options">
<label class="variant-option" class:selected={variant === 'cpu'}>
<input type="radio" name="variant" value="cpu" bind:group={variant} />
<div class="variant-info">
<span class="variant-label">Standard (CPU)</span>
<span class="variant-desc">Works on all computers (~500 MB download)</span>
</div>
</label>
<label class="variant-option" class:selected={variant === 'cuda'}>
<input type="radio" name="variant" value="cuda" bind:group={variant} />
<div class="variant-info">
<span class="variant-label">GPU Accelerated (CUDA)</span>
<span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
</div>
</label>
</div>
{#if error}
<div class="error-box">
<p class="error-text">{error}</p>
<button class="btn-retry" onclick={startDownload}>Retry</button>
</div>
{:else}
<button class="btn-download" onclick={startDownload}>
Download &amp; Install
</button>
{/if}
{:else if downloading}
<div class="progress-section">
<div class="progress-bar-track">
<div class="progress-bar-fill" style="width: {downloadProgress.percent}%"></div>
</div>
<p class="progress-text">
{downloadProgress.percent}% — {formatBytes(downloadProgress.downloaded)} / {formatBytes(downloadProgress.total)}
</p>
<p class="progress-hint">Downloading {variant === 'cuda' ? 'GPU' : 'CPU'} engine...</p>
</div>
{:else if success}
<div class="success-section">
<div class="success-icon">&#10003;</div>
<p class="success-text">Setup complete!</p>
</div>
{/if}
</div>
</div>
<style>
.setup-overlay {
position: fixed;
inset: 0;
background: #0a0a23;
display: flex;
align-items: center;
justify-content: center;
z-index: 10000;
}
.setup-card {
background: #16213e;
border: 1px solid #2a3a5e;
border-radius: 12px;
padding: 2.5rem 3rem;
max-width: 480px;
width: 90vw;
color: #e0e0e0;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
text-align: center;
}
.app-title {
font-size: 1.8rem;
margin: 0 0 0.25rem;
color: #e94560;
font-weight: 700;
}
.setup-heading {
font-size: 1.1rem;
margin: 0 0 0.75rem;
color: #e0e0e0;
font-weight: 500;
}
.setup-description {
font-size: 0.9rem;
color: #b0b0b0;
margin: 0 0 1.5rem;
line-height: 1.5;
}
.variant-options {
display: flex;
flex-direction: column;
gap: 0.75rem;
margin-bottom: 1.5rem;
text-align: left;
}
.variant-option {
display: flex;
align-items: flex-start;
gap: 0.75rem;
padding: 0.85rem 1rem;
border: 1px solid #2a3a5e;
border-radius: 8px;
cursor: pointer;
transition: border-color 0.15s, background 0.15s;
}
.variant-option:hover {
border-color: #4a5568;
background: rgba(255, 255, 255, 0.02);
}
.variant-option.selected {
border-color: #e94560;
background: rgba(233, 69, 96, 0.08);
}
.variant-option input[type='radio'] {
margin-top: 0.2rem;
accent-color: #e94560;
flex-shrink: 0;
}
.variant-info {
display: flex;
flex-direction: column;
gap: 0.2rem;
}
.variant-label {
font-size: 0.9rem;
font-weight: 500;
color: #e0e0e0;
}
.variant-desc {
font-size: 0.78rem;
color: #888;
}
.btn-download {
background: #e94560;
border: none;
color: white;
padding: 0.7rem 1.5rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.9rem;
font-weight: 500;
width: 100%;
transition: background 0.15s;
}
.btn-download:hover {
background: #d63851;
}
.progress-section {
margin-top: 0.5rem;
}
.progress-bar-track {
width: 100%;
height: 8px;
background: #1a1a2e;
border-radius: 4px;
overflow: hidden;
border: 1px solid #2a3a5e;
}
.progress-bar-fill {
height: 100%;
background: #e94560;
border-radius: 4px;
transition: width 0.3s ease;
}
.progress-text {
margin: 0.75rem 0 0;
font-size: 0.85rem;
color: #e0e0e0;
font-variant-numeric: tabular-nums;
}
.progress-hint {
margin: 0.35rem 0 0;
font-size: 0.78rem;
color: #888;
}
.error-box {
background: rgba(233, 69, 96, 0.1);
border: 1px solid rgba(233, 69, 96, 0.3);
border-radius: 8px;
padding: 1rem;
}
.error-text {
color: #e94560;
font-size: 0.85rem;
margin: 0 0 0.75rem;
word-break: break-word;
line-height: 1.4;
}
.btn-retry {
background: #e94560;
border: none;
color: white;
padding: 0.5rem 1.25rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
font-weight: 500;
}
.btn-retry:hover {
background: #d63851;
}
.success-section {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.5rem;
padding: 1rem 0;
}
.success-icon {
width: 48px;
height: 48px;
border-radius: 50%;
background: rgba(78, 205, 196, 0.15);
color: #4ecdc4;
display: flex;
align-items: center;
justify-content: center;
font-size: 1.5rem;
font-weight: 700;
}
.success-text {
color: #4ecdc4;
font-size: 1rem;
margin: 0;
font-weight: 500;
}
</style>

View File

@@ -272,9 +272,7 @@
<style> <style>
.transcript-editor { .transcript-editor {
flex: 1; flex: 1;
min-width: 0;
overflow-y: auto; overflow-y: auto;
overflow-x: hidden;
padding: 1rem; padding: 1rem;
background: #16213e; background: #16213e;
border-radius: 8px; border-radius: 8px;
@@ -321,7 +319,6 @@
.segment-text { .segment-text {
line-height: 1.6; line-height: 1.6;
padding-left: 0.75rem; padding-left: 0.75rem;
white-space: pre-wrap;
word-wrap: break-word; word-wrap: break-word;
overflow-wrap: break-word; overflow-wrap: break-word;
} }

View File

@@ -10,15 +10,14 @@ export interface AppSettings {
litellm_model: string; litellm_model: string;
litellm_api_key: string; litellm_api_key: string;
litellm_api_base: string; litellm_api_base: string;
ollama_url: string; local_model_path: string;
ollama_model: string; local_binary_path: string;
transcription_model: string; transcription_model: string;
transcription_device: string; transcription_device: string;
transcription_language: string; transcription_language: string;
skip_diarization: boolean; skip_diarization: boolean;
hf_token: string; hf_token: string;
num_speakers: number | null; num_speakers: number | null;
devtools_enabled: boolean;
} }
const defaults: AppSettings = { const defaults: AppSettings = {
@@ -30,15 +29,14 @@ const defaults: AppSettings = {
litellm_model: 'gpt-4o-mini', litellm_model: 'gpt-4o-mini',
litellm_api_key: '', litellm_api_key: '',
litellm_api_base: '', litellm_api_base: '',
ollama_url: 'http://localhost:11434', local_model_path: '',
ollama_model: 'llama3.2', local_binary_path: 'llama-server',
transcription_model: 'base', transcription_model: 'base',
transcription_device: 'cpu', transcription_device: 'cpu',
transcription_language: '', transcription_language: '',
skip_diarization: false, skip_diarization: false,
hf_token: '', hf_token: '',
num_speakers: null, num_speakers: null,
devtools_enabled: false,
}; };
export const settings = writable<AppSettings>({ ...defaults }); export const settings = writable<AppSettings>({ ...defaults });
@@ -61,7 +59,7 @@ export async function saveSettings(s: AppSettings): Promise<void> {
openai: { api_key: s.openai_api_key, model: s.openai_model }, openai: { api_key: s.openai_api_key, model: s.openai_model },
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model }, anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model }, litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
local: { model: s.ollama_model, base_url: s.ollama_url + '/v1' }, local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
}; };
const config = configMap[s.ai_provider]; const config = configMap[s.ai_provider];
if (config) { if (config) {

View File

@@ -8,7 +8,6 @@
import AIChatPanel from '$lib/components/AIChatPanel.svelte'; import AIChatPanel from '$lib/components/AIChatPanel.svelte';
import ProgressOverlay from '$lib/components/ProgressOverlay.svelte'; import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
import SettingsModal from '$lib/components/SettingsModal.svelte'; import SettingsModal from '$lib/components/SettingsModal.svelte';
import SidecarSetup from '$lib/components/SidecarSetup.svelte';
import { segments, speakers } from '$lib/stores/transcript'; import { segments, speakers } from '$lib/stores/transcript';
import { settings, loadSettings } from '$lib/stores/settings'; import { settings, loadSettings } from '$lib/stores/settings';
import type { Segment, Speaker } from '$lib/types/transcript'; import type { Segment, Speaker } from '$lib/types/transcript';
@@ -19,61 +18,13 @@
let audioUrl = $state(''); let audioUrl = $state('');
let showSettings = $state(false); let showSettings = $state(false);
// Sidecar state
let sidecarReady = $state(false);
let sidecarChecked = $state(false);
// Sidecar update state
let sidecarUpdate = $state<{ current_version: string; latest_version: string } | null>(null);
let showUpdateDownload = $state(false);
let updateDismissed = $state(false);
// Project management state // Project management state
let currentProjectPath = $state<string | null>(null); let currentProjectPath = $state<string | null>(null);
let currentProjectName = $state(''); let currentProjectName = $state('');
let audioFilePath = $state(''); let audioFilePath = $state('');
async function checkSidecar() {
try {
const ready = await invoke<boolean>('check_sidecar');
sidecarReady = ready;
} catch {
sidecarReady = false;
}
sidecarChecked = true;
}
async function checkSidecarUpdate() {
try {
const update = await invoke<{ current_version: string; latest_version: string } | null>('check_sidecar_update');
sidecarUpdate = update;
} catch {
// Silently ignore update check failures
}
}
function handleSidecarSetupComplete() {
sidecarReady = true;
checkSidecarUpdate();
}
function handleUpdateComplete() {
showUpdateDownload = false;
sidecarUpdate = null;
}
onMount(() => { onMount(() => {
loadSettings().then(() => { loadSettings();
// Restore devtools state from settings
if ($settings.devtools_enabled) {
invoke('toggle_devtools', { open: true });
}
});
checkSidecar().then(() => {
if (sidecarReady) {
checkSidecarUpdate();
}
});
// Global keyboard shortcuts // Global keyboard shortcuts
function handleKeyDown(e: KeyboardEvent) { function handleKeyDown(e: KeyboardEvent) {
@@ -492,31 +443,14 @@
} }
</script> </script>
{#if !appReady || !sidecarChecked} {#if !appReady}
<div class="splash-screen"> <div class="splash-screen">
<h1 class="splash-title">Voice to Notes</h1> <h1 class="splash-title">Voice to Notes</h1>
<p class="splash-subtitle">Loading...</p> <p class="splash-subtitle">Loading...</p>
<div class="splash-spinner"></div> <div class="splash-spinner"></div>
</div> </div>
{:else if sidecarChecked && !sidecarReady && !showUpdateDownload}
<SidecarSetup onComplete={handleSidecarSetupComplete} />
{:else if showUpdateDownload}
<SidecarSetup onComplete={handleUpdateComplete} />
{:else} {:else}
<div class="app-shell"> <div class="app-shell">
{#if sidecarUpdate && !updateDismissed}
<div class="update-banner">
<span class="update-text">
Sidecar update available (v{sidecarUpdate.current_version} &rarr; v{sidecarUpdate.latest_version})
</span>
<button class="update-btn" onclick={() => showUpdateDownload = true}>
Update
</button>
<button class="update-dismiss" onclick={() => updateDismissed = true} title="Dismiss">
&times;
</button>
</div>
{/if}
<div class="app-header"> <div class="app-header">
<div class="header-actions"> <div class="header-actions">
<button class="settings-btn" onclick={openProject} disabled={isTranscribing}> <button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
@@ -740,45 +674,4 @@
@keyframes spin { @keyframes spin {
to { transform: rotate(360deg); } to { transform: rotate(360deg); }
} }
/* Sidecar update banner */
.update-banner {
display: flex;
align-items: center;
gap: 0.75rem;
padding: 0.5rem 1rem;
background: rgba(78, 205, 196, 0.1);
border-bottom: 1px solid rgba(78, 205, 196, 0.25);
color: #e0e0e0;
font-size: 0.85rem;
}
.update-text {
flex: 1;
color: #b0b0b0;
}
.update-btn {
background: #4ecdc4;
border: none;
color: #0a0a23;
padding: 0.3rem 0.85rem;
border-radius: 4px;
cursor: pointer;
font-size: 0.8rem;
font-weight: 600;
}
.update-btn:hover {
background: #3dbdb5;
}
.update-dismiss {
background: none;
border: none;
color: #888;
font-size: 1.1rem;
cursor: pointer;
padding: 0.1rem 0.3rem;
line-height: 1;
}
.update-dismiss:hover {
color: #e0e0e0;
}
</style> </style>