2 Commits

Author SHA1 Message Date
Claude
eb9ec687cb Use uv for Python management in CI and build script
- CI: install uv via astral-sh/setup-uv, use uv to install Python
  and run the build script (replaces setup-python which fails on
  self-hosted macOS runners)
- build_sidecar.py: auto-detects uv and uses it for venv creation
  and package installation (much faster), falls back to standard
  venv + pip when uv is not available

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 22:56:13 -07:00
Claude
d297540053 Skip setup-python on macOS, use system Python instead
setup-python's internal install script hardcodes /Users/runner which
fails on self-hosted runners without sudo. macOS ships with Python 3
so we use it directly and skip the action entirely.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 22:47:05 -07:00
40 changed files with 411 additions and 3237 deletions

View File

@@ -1,402 +0,0 @@
name: Build Sidecars
on:
push:
branches: [main]
paths: ['python/**']
workflow_dispatch:
jobs:
bump-sidecar-version:
name: Bump sidecar version and tag
if: "!contains(github.event.head_commit.message, '[skip ci]')"
runs-on: ubuntu-latest
outputs:
version: ${{ steps.bump.outputs.version }}
tag: ${{ steps.bump.outputs.tag }}
has_changes: ${{ steps.check_changes.outputs.has_changes }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Check for python changes
id: check_changes
run: |
# If triggered by workflow_dispatch, always build
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check if any python/ files changed in this commit
CHANGED=$(git diff --name-only HEAD~1 HEAD -- python/ 2>/dev/null || echo "")
if [ -n "$CHANGED" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Python changes detected: $CHANGED"
else
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No python/ changes detected, skipping sidecar build"
fi
- name: Configure git
if: steps.check_changes.outputs.has_changes == 'true'
run: |
git config user.name "Gitea Actions"
git config user.email "actions@gitea.local"
- name: Bump sidecar patch version
if: steps.check_changes.outputs.has_changes == 'true'
id: bump
run: |
# Read current version from python/pyproject.toml
CURRENT=$(grep '^version = ' python/pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
echo "Current sidecar version: ${CURRENT}"
# Increment patch number
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
NEW_PATCH=$((PATCH + 1))
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
echo "New sidecar version: ${NEW_VERSION}"
# Update python/pyproject.toml
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" python/pyproject.toml
echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
- name: Commit and tag
if: steps.check_changes.outputs.has_changes == 'true'
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
NEW_VERSION="${{ steps.bump.outputs.version }}"
TAG="${{ steps.bump.outputs.tag }}"
git add python/pyproject.toml
git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
git tag "${TAG}"
# Push using token for authentication (rebase in case another workflow pushed first)
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
git pull --rebase "${REMOTE_URL}" main || true
git push "${REMOTE_URL}" HEAD:main
git push "${REMOTE_URL}" "${TAG}"
- name: Create Gitea release
if: steps.check_changes.outputs.has_changes == 'true'
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ steps.bump.outputs.tag }}"
VERSION="${{ steps.bump.outputs.version }}"
RELEASE_NAME="Sidecar v${VERSION}"
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
"${REPO_API}/releases"
echo "Created release: ${RELEASE_NAME}"
build-sidecar-linux:
name: Build Sidecar (Linux)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: ubuntu-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
run: |
if command -v uv &> /dev/null; then
echo "uv already installed: $(uv --version)"
else
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
fi
- name: Install ffmpeg
run: sudo apt-get update && sudo apt-get install -y ffmpeg
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar (CUDA)
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
- name: Package sidecar (CUDA)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cuda.zip .
- name: Build sidecar (CPU)
working-directory: python
run: |
rm -rf dist/voice-to-notes-sidecar
uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-linux-x86_64-cpu.zip .
- name: Upload to sidecar release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
sudo apt-get install -y jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
echo "Waiting for sidecar release ${TAG} to be available..."
for i in $(seq 1 30); do
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}")
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
fi
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
sleep 10
done
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
fi
for file in sidecar-*.zip; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done
build-sidecar-windows:
name: Build Sidecar (Windows)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: windows-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
shell: powershell
run: |
if (Get-Command uv -ErrorAction SilentlyContinue) {
Write-Host "uv already installed: $(uv --version)"
} else {
irm https://astral.sh/uv/install.ps1 | iex
echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
}
- name: Install ffmpeg
shell: powershell
run: choco install ffmpeg -y
- name: Set up Python
shell: powershell
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Install 7-Zip
shell: powershell
run: |
if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
choco install 7zip -y
}
- name: Build sidecar (CUDA)
shell: powershell
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --with-cuda
- name: Package sidecar (CUDA)
shell: powershell
run: |
7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\python\dist\voice-to-notes-sidecar\*
- name: Build sidecar (CPU)
shell: powershell
working-directory: python
run: |
Remove-Item -Recurse -Force dist\voice-to-notes-sidecar
uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
shell: powershell
run: |
7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\python\dist\voice-to-notes-sidecar\*
- name: Upload to sidecar release
shell: powershell
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
$TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
Write-Host "Waiting for sidecar release ${TAG} to be available..."
$RELEASE_ID = $null
for ($i = 1; $i -le 30; $i++) {
try {
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
$RELEASE_ID = $release.id
if ($RELEASE_ID) {
Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
}
} catch {
# Release not ready yet
}
Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
Start-Sleep -Seconds 10
}
if (-not $RELEASE_ID) {
Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
}
Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
$filename = $_.Name
$encodedName = [System.Uri]::EscapeDataString($filename)
$size = [math]::Round($_.Length / 1MB, 1)
Write-Host "Uploading ${filename} (${size} MB)..."
try {
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
$existing = $assets | Where-Object { $_.name -eq $filename }
if ($existing) {
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
}
} catch {}
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
$result = curl.exe --fail --silent --show-error `
-X POST `
-H "Authorization: token $env:BUILD_TOKEN" `
-H "Content-Type: application/octet-stream" `
-T "$($_.FullName)" `
"$uploadUrl" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "Upload successful: ${filename}"
} else {
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
}
}
build-sidecar-macos:
name: Build Sidecar (macOS)
needs: bump-sidecar-version
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
runs-on: macos-latest
env:
PYTHON_VERSION: "3.11"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
- name: Install uv
run: |
if command -v uv &> /dev/null; then
echo "uv already installed: $(uv --version)"
else
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.local/bin" >> $GITHUB_PATH
fi
- name: Install ffmpeg
run: brew install ffmpeg
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar (CPU)
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Package sidecar (CPU)
run: |
cd python/dist/voice-to-notes-sidecar && zip -r ../../../sidecar-macos-aarch64-cpu.zip .
- name: Upload to sidecar release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
which jq || brew install jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
# Find the sidecar release by tag (retry up to 30 times with 10s delay)
echo "Waiting for sidecar release ${TAG} to be available..."
for i in $(seq 1 30); do
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}")
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
break
fi
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
sleep 10
done
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
exit 1
fi
for file in sidecar-*.zip; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done

191
.gitea/workflows/build.yml Normal file
View File

@@ -0,0 +1,191 @@
name: Build & Release
on:
push:
branches: [main]
tags: ["v*"]
pull_request:
branches: [main]
env:
PYTHON_VERSION: "3.11"
NODE_VERSION: "20"
jobs:
build-sidecar:
name: Build sidecar (${{ matrix.target }})
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
platform: linux
- runner: windows-latest
target: x86_64-pc-windows-msvc
platform: windows
- runner: macos-latest
target: aarch64-apple-darwin
platform: macos
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Set up Python
run: uv python install ${{ env.PYTHON_VERSION }}
- name: Build sidecar
working-directory: python
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
- name: Upload sidecar artifact
uses: actions/upload-artifact@v3
with:
name: sidecar-${{ matrix.target }}
path: python/dist/voice-to-notes-sidecar/
retention-days: 7
build-tauri:
name: Build app (${{ matrix.target }})
needs: build-sidecar
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
matrix:
include:
- runner: ubuntu-latest
target: x86_64-unknown-linux-gnu
platform: linux
- runner: windows-latest
target: x86_64-pc-windows-msvc
platform: windows
- runner: macos-latest
target: aarch64-apple-darwin
platform: macos
steps:
- uses: actions/checkout@v4
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
- name: Install system dependencies (Linux)
if: matrix.platform == 'linux'
run: |
sudo apt-get update
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
- name: Install system dependencies (macOS)
if: matrix.platform == 'macos'
run: |
brew install --quiet create-dmg || true
- name: Download sidecar artifact
uses: actions/download-artifact@v3
with:
name: sidecar-${{ matrix.target }}
path: src-tauri/binaries/
- name: Make sidecar executable (Unix)
if: matrix.platform != 'windows'
run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
env:
TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
- name: Upload app artifacts (Linux)
if: matrix.platform == 'linux'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/deb/*.deb
src-tauri/target/release/bundle/appimage/*.AppImage
retention-days: 30
- name: Upload app artifacts (Windows)
if: matrix.platform == 'windows'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/msi/*.msi
src-tauri/target/release/bundle/nsis/*.exe
retention-days: 30
- name: Upload app artifacts (macOS)
if: matrix.platform == 'macos'
uses: actions/upload-artifact@v3
with:
name: app-${{ matrix.target }}
path: |
src-tauri/target/release/bundle/dmg/*.dmg
src-tauri/target/release/bundle/macos/*.app
retention-days: 30
release:
name: Create Release
needs: build-tauri
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install required tools
run: |
sudo apt-get update
sudo apt-get install -y jq curl
- name: Download all app artifacts
uses: actions/download-artifact@v3
with:
path: artifacts/
- name: Generate release tag
id: tag
run: echo "tag=build-$(date +%Y%m%d-%H%M%S)" >> $GITHUB_OUTPUT
- name: Create release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
TAG: ${{ steps.tag.outputs.tag }}
run: |
# Create the release
RELEASE_ID=$(curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"Voice to Notes ${TAG}\", \"body\": \"Automated build from main branch.\", \"draft\": false, \"prerelease\": true}" \
"${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/releases" | jq -r '.id')
echo "Release ID: ${RELEASE_ID}"
if [ "${RELEASE_ID}" = "null" ] || [ -z "${RELEASE_ID}" ]; then
echo "ERROR: Failed to create release. Check BUILD_TOKEN permissions."
exit 1
fi
# Upload all artifacts
find artifacts/ -type f \( -name "*.deb" -o -name "*.AppImage" -o -name "*.msi" -o -name "*.exe" -o -name "*.dmg" \) | while read file; do
filename=$(basename "$file")
echo "Uploading ${filename}..."
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
--data-binary "@${file}" \
"${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/releases/${RELEASE_ID}/assets?name=${filename}"
done

View File

@@ -1,65 +0,0 @@
name: Cleanup Old Releases
on:
# Run after release and sidecar workflows complete
schedule:
- cron: '0 6 * * *' # Daily at 6am UTC
workflow_dispatch:
jobs:
cleanup:
name: Remove old releases
runs-on: ubuntu-latest
env:
KEEP_COUNT: 5
steps:
- name: Cleanup old app releases
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
# Get all releases, sorted newest first (API default)
RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases?limit=50")
# Separate app releases (v*) and sidecar releases (sidecar-v*)
APP_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("v") and (startswith("sidecar") | not)) | .id] | .[]')
SIDECAR_IDS=$(echo "$RELEASES" | jq -r '[.[] | select(.tag_name | startswith("sidecar-v")) | .id] | .[]')
# Delete app releases beyond KEEP_COUNT
COUNT=0
for ID in $APP_IDS; do
COUNT=$((COUNT + 1))
if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
continue
fi
TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
echo "Deleting app release $ID ($TAG)..."
curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/$ID"
# Also delete the tag
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/tags/$TAG"
done
# Delete sidecar releases beyond KEEP_COUNT
COUNT=0
for ID in $SIDECAR_IDS; do
COUNT=$((COUNT + 1))
if [ $COUNT -le ${{ env.KEEP_COUNT }} ]; then
continue
fi
TAG=$(echo "$RELEASES" | jq -r ".[] | select(.id == $ID) | .tag_name")
echo "Deleting sidecar release $ID ($TAG)..."
curl -s -o /dev/null -w "HTTP %{http_code}\n" -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/$ID"
curl -s -o /dev/null -X DELETE \
-H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/tags/$TAG"
done
echo "Cleanup complete. Kept latest ${{ env.KEEP_COUNT }} of each type."

View File

@@ -1,305 +0,0 @@
name: Release
on:
push:
branches: [main]
jobs:
bump-version:
name: Bump version and tag
# Skip if this is a version-bump commit (avoid infinite loop)
if: "!contains(github.event.head_commit.message, '[skip ci]')"
runs-on: ubuntu-latest
outputs:
new_version: ${{ steps.bump.outputs.new_version }}
tag: ${{ steps.bump.outputs.tag }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Configure git
run: |
git config user.name "Gitea Actions"
git config user.email "actions@gitea.local"
- name: Bump patch version
id: bump
run: |
# Read current version from package.json
CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
echo "Current version: ${CURRENT}"
# Increment patch number
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
NEW_PATCH=$((PATCH + 1))
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
echo "New version: ${NEW_VERSION}"
# Update package.json
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
# Update src-tauri/tauri.conf.json
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
# Update src-tauri/Cargo.toml (match version = "x.y.z" in [package] section)
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
- name: Commit and tag
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
NEW_VERSION="${{ steps.bump.outputs.new_version }}"
git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml
git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
git tag "v${NEW_VERSION}"
# Push using token for authentication (rebase in case another workflow pushed first)
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
git pull --rebase "${REMOTE_URL}" main || true
git push "${REMOTE_URL}" HEAD:main
git push "${REMOTE_URL}" "v${NEW_VERSION}"
- name: Create Gitea release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ steps.bump.outputs.tag }}"
RELEASE_NAME="Voice to Notes ${TAG}"
curl -s -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
"${REPO_API}/releases"
echo "Created release: ${RELEASE_NAME}"
# ── Platform builds (run after version bump) ──
build-linux:
name: Build App (Linux)
needs: bump-version
runs-on: ubuntu-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
- name: Upload to release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
sudo apt-get install -y jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-version.outputs.tag }}"
echo "Release tag: ${TAG}"
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find release for tag ${TAG}."
exit 1
fi
echo "Release ID: ${RELEASE_ID}"
find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" \) | while IFS= read -r file; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done
build-windows:
name: Build App (Windows)
needs: bump-version
runs-on: windows-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
shell: powershell
run: |
if (Get-Command rustup -ErrorAction SilentlyContinue) {
rustup default stable
} else {
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
.\rustup-init.exe -y --default-toolchain stable
echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
}
- name: Install npm dependencies
shell: powershell
run: npm ci
- name: Build Tauri app
shell: powershell
run: npm run tauri build
- name: Upload to release
shell: powershell
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
$TAG = "${{ needs.bump-version.outputs.tag }}"
Write-Host "Release tag: ${TAG}"
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
$RELEASE_ID = $release.id
Write-Host "Release ID: ${RELEASE_ID}"
Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
$filename = $_.Name
$encodedName = [System.Uri]::EscapeDataString($filename)
$size = [math]::Round($_.Length / 1MB, 1)
Write-Host "Uploading ${filename} (${size} MB)..."
try {
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
$existing = $assets | Where-Object { $_.name -eq $filename }
if ($existing) {
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
}
} catch {}
# Use curl for streaming upload (Invoke-RestMethod fails on large files)
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
$result = curl.exe --fail --silent --show-error `
-X POST `
-H "Authorization: token $env:BUILD_TOKEN" `
-H "Content-Type: application/octet-stream" `
-T "$($_.FullName)" `
"$uploadUrl" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "Upload successful: ${filename}"
} else {
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
}
}
build-macos:
name: Build App (macOS)
needs: bump-version
runs-on: macos-latest
env:
NODE_VERSION: "20"
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.bump-version.outputs.tag }}
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Rust stable
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install system dependencies
run: brew install --quiet create-dmg || true
- name: Install npm dependencies
run: npm ci
- name: Build Tauri app
run: npm run tauri build
- name: Upload to release
env:
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
run: |
which jq || brew install jq
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
TAG="${{ needs.bump-version.outputs.tag }}"
echo "Release tag: ${TAG}"
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
echo "ERROR: Failed to find release for tag ${TAG}."
exit 1
fi
echo "Release ID: ${RELEASE_ID}"
find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
filename=$(basename "$file")
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
if [ -n "${ASSET_ID}" ]; then
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: token ${BUILD_TOKEN}" \
-H "Content-Type: application/octet-stream" \
-T "$file" \
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
echo "Upload response: HTTP ${HTTP_CODE}"
done

1
.gitignore vendored
View File

@@ -50,6 +50,5 @@ Thumbs.db
# Sidecar build artifacts
src-tauri/binaries/*
!src-tauri/binaries/.gitkeep
src-tauri/sidecar.zip
python/dist/
python/build/

View File

@@ -1,140 +0,0 @@
# Contributing to Voice to Notes
Thank you for your interest in contributing! This guide covers how to set up the project for development and submit changes.
## Development Setup
### Prerequisites
- **Node.js 20+** and npm
- **Rust** (stable toolchain)
- **Python 3.11+** with [uv](https://docs.astral.sh/uv/) (recommended) or pip
- **System libraries (Linux only):**
```bash
sudo apt install libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
```
### Clone and Install
```bash
git clone https://repo.anhonesthost.net/MacroPad/voice-to-notes.git
cd voice-to-notes
# Frontend
npm install
# Python sidecar
cd python && pip install -e ".[dev]" && cd ..
```
### Running in Dev Mode
```bash
npm run tauri:dev
```
This runs the Svelte dev server + Tauri with hot-reload. The Python sidecar runs from your system Python (no PyInstaller needed in dev mode).
### Building
```bash
# Build the Python sidecar (frozen binary)
cd python && python build_sidecar.py --cpu-only && cd ..
# Build the full app
npm run tauri build
```
## Project Structure
```
src/ # Svelte 5 frontend
lib/components/ # Reusable UI components
lib/stores/ # Svelte stores (app state)
routes/ # SvelteKit pages
src-tauri/ # Rust backend (Tauri v2)
src/sidecar/ # Python sidecar lifecycle (download, extract, IPC)
src/commands/ # Tauri command handlers
src/db/ # SQLite database layer
python/ # Python ML sidecar
voice_to_notes/ # Main package
services/ # Transcription, diarization, AI, export
ipc/ # JSON-line IPC protocol
hardware/ # GPU/CPU detection
.gitea/workflows/ # CI/CD pipelines
docs/ # Documentation
```
## How It Works
The app has three layers:
1. **Frontend (Svelte)** — UI, audio playback (wavesurfer.js), transcript editing (TipTap)
2. **Backend (Rust/Tauri)** — Desktop integration, file access, SQLite, sidecar process management
3. **Sidecar (Python)** — ML inference (faster-whisper, pyannote.audio), AI chat, export
Rust and Python communicate via **JSON-line IPC** over stdin/stdout pipes. Each request has an `id`, `type`, and `payload`. The Python sidecar runs as a child process managed by `SidecarManager` in Rust.
## Conventions
### Rust
- Follow standard Rust conventions
- Run `cargo fmt` and `cargo clippy` before committing
- Tauri commands go in `src-tauri/src/commands/`
### Python
- Python 3.11+, type hints everywhere
- Use `ruff` for linting: `ruff check python/`
- Tests with pytest: `cd python && pytest`
- IPC messages: JSON-line format with `id`, `type`, `payload` fields
### TypeScript / Svelte
- Svelte 5 runes (`$state`, `$derived`, `$effect`)
- Strict TypeScript
- Components in `src/lib/components/`
- State in `src/lib/stores/`
### General
- All timestamps in milliseconds (integer)
- UUIDs as primary keys in the database
- Don't bundle API keys or secrets — those are user-configured
## Submitting Changes
1. Fork the repository
2. Create a feature branch: `git checkout -b my-feature`
3. Make your changes
4. Test locally with `npm run tauri:dev`
5. Run linters: `cargo fmt && cargo clippy`, `ruff check python/`
6. Commit with a clear message describing the change
7. Open a Pull Request against `main`
## CI/CD
Pushes to `main` automatically:
- Bump the app version and create a release (`release.yml`)
- Build app installers for all platforms
Changes to `python/` also trigger sidecar builds (`build-sidecar.yml`).
## Areas for Contribution
- UI/UX improvements
- New export formats
- Additional AI provider integrations
- Performance optimizations
- Accessibility improvements
- Documentation and translations
- Bug reports and testing on different platforms
## Reporting Issues
Open an issue on the [repository](https://repo.anhonesthost.net/MacroPad/voice-to-notes/issues) with:
- Steps to reproduce
- Expected vs actual behavior
- Platform and version info
- Sidecar logs (`%LOCALAPPDATA%\com.voicetonotes.app\sidecar.log` on Windows)
## License
By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).

119
README.md
View File

@@ -1,55 +1,32 @@
# Voice to Notes
A desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, plain text, or Markdown.
A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
## Features
- **Speech-to-Text** — Accurate transcription via faster-whisper with word-level timestamps. Supports 99 languages.
- **Speaker Identification** — Detect and label speakers using pyannote.audio. Rename speakers for clean exports.
- **GPU Acceleration** — CUDA support for NVIDIA GPUs (Windows/Linux). Falls back to CPU automatically.
- **Synchronized Playback** — Click any word to seek. Waveform visualization via wavesurfer.js.
- **AI Chat** — Ask questions about your transcript. Works with Ollama (local), OpenAI, Anthropic, or any OpenAI-compatible API.
- **Export** — SRT, WebVTT, ASS, plain text, Markdown — all with speaker labels.
- **Cross-Platform** — Linux, Windows, macOS (Apple Silicon).
## Quick Start
1. Download the installer from [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases)
2. On first launch, choose **CPU** or **CUDA** sidecar (the AI engine downloads separately, ~500MB2GB)
3. Import an audio/video file and click **Transcribe**
See the full [User Guide](docs/USER_GUIDE.md) for detailed setup and usage instructions.
- **Speech-to-Text Transcription** — Accurate transcription via faster-whisper (Whisper models) with word-level timestamps
- **Speaker Identification (Diarization)** — Detect and distinguish between speakers using pyannote.audio
- **Synchronized Playback** — Click any word to seek to that point in the audio (Web Audio API for instant playback)
- **AI Integration** — Ask questions about your transcript via OpenAI, Anthropic, or any OpenAI-compatible API (LiteLLM proxies, Ollama, vLLM)
- **Export Formats** — SRT, WebVTT, ASS captions, plain text, and Markdown with speaker labels
- **Cross-Platform** — Builds for Linux, Windows, and macOS (Apple Silicon)
## Platform Support
| Platform | Architecture | Installers |
|----------|-------------|------------|
| Linux | x86_64 | .deb, .rpm |
| Windows | x86_64 | .msi, .exe (NSIS) |
| macOS | ARM (Apple Silicon) | .dmg |
## Architecture
The app is split into two independently versioned components:
- **App** (v0.2.x) — Tauri desktop shell with Svelte frontend. Small installer (~50MB).
- **Sidecar** (v1.x) — Python ML engine (faster-whisper, pyannote.audio). Downloaded on first launch. CPU (~500MB) or CUDA (~2GB) variants.
This separation means app UI updates don't require re-downloading the sidecar, and sidecar updates don't require reinstalling the app.
| Platform | Architecture | Status |
|----------|-------------|--------|
| Linux | x86_64 | Supported |
| Windows | x86_64 | Supported |
| macOS | ARM (Apple Silicon) | Supported |
## Tech Stack
| Component | Technology |
|-----------|-----------|
| Desktop shell | Tauri v2 (Rust + Svelte 5 / TypeScript) |
| Transcription | faster-whisper (CTranslate2) |
| Speaker ID | pyannote.audio 3.1 |
| Audio UI | wavesurfer.js |
| Transcript editor | TipTap (ProseMirror) |
| AI (local) | Ollama (any model) |
| AI (cloud) | OpenAI, Anthropic, OpenAI-compatible |
| Caption export | pysubs2 |
| Database | SQLite (rusqlite) |
- **Desktop shell:** Tauri v2 (Rust backend + Svelte 5 / TypeScript frontend)
- **ML pipeline:** Python sidecar (faster-whisper, pyannote.audio) — frozen via PyInstaller for distribution
- **Audio playback:** wavesurfer.js with Web Audio API backend
- **AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (local or remote)
- **Local AI:** Bundled llama-server (llama.cpp)
- **Caption export:** pysubs2
## Development
@@ -57,8 +34,8 @@ This separation means app UI updates don't require re-downloading the sidecar, a
- Node.js 20+
- Rust (stable)
- Python 3.11+ with uv or pip
- Linux: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`
- Python 3.11+ with ML dependencies
- System: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev` (Linux)
### Getting Started
@@ -67,63 +44,49 @@ This separation means app UI updates don't require re-downloading the sidecar, a
npm install
# Install Python sidecar dependencies
cd python && pip install -e ".[dev]" && cd ..
cd python && pip install -e . && cd ..
# Run in dev mode (uses system Python for the sidecar)
npm run tauri:dev
```
### Building
### Building for Distribution
```bash
# Build the frozen Python sidecar (CPU-only)
cd python && python build_sidecar.py --cpu-only && cd ..
# Build the frozen Python sidecar
npm run sidecar:build
# Build with CUDA support
cd python && python build_sidecar.py --with-cuda && cd ..
# Build the Tauri app
# Build the Tauri app (requires sidecar in src-tauri/binaries/)
npm run tauri build
```
### CI/CD
Two Gitea Actions workflows in `.gitea/workflows/`:
Gitea Actions workflows are in `.gitea/workflows/`. The build pipeline:
**`release.yml`** — Triggers on push to main:
1. Bumps app version (patch), creates git tag and Gitea release
2. Builds lightweight app installers for all platforms (no sidecar bundled)
**`build-sidecar.yml`** — Triggers on changes to `python/` or manual dispatch:
1. Bumps sidecar version, creates `sidecar-v*` tag and release
2. Builds CPU + CUDA variants for Linux/Windows, CPU for macOS
3. Uploads as separate release assets
1. **Build sidecar**PyInstaller-frozen Python binary per platform (CPU-only PyTorch)
2. **Build Tauri app** — Bundles the sidecar via `externalBin`, produces .deb/.AppImage (Linux), .msi (Windows), .dmg (macOS)
#### Required Secrets
| Secret | Purpose |
|--------|---------|
| `BUILD_TOKEN` | Gitea API token for creating releases and pushing tags |
| Secret | Purpose | Required? |
|--------|---------|-----------|
| `TAURI_SIGNING_PRIVATE_KEY` | Signs Tauri update bundles | Optional (for auto-updates) |
No other secrets are needed for building. AI provider API keys and HuggingFace tokens are configured by end users in the app's Settings.
### Project Structure
```
src/ # Svelte 5 frontend
lib/components/ # UI components (waveform, transcript editor, settings, etc.)
lib/stores/ # Svelte stores (settings, transcript state)
routes/ # SvelteKit pages
src-tauri/ # Rust backend
src/sidecar/ # Sidecar process manager (download, extract, IPC)
src/commands/ # Tauri command handlers
nsis-hooks.nsh # Windows uninstall cleanup
python/ # Python sidecar
voice_to_notes/ # Python package (transcription, diarization, AI, export)
build_sidecar.py # PyInstaller build script
voice_to_notes.spec # PyInstaller spec
.gitea/workflows/ # CI/CD (release.yml, build-sidecar.yml)
docs/ # Documentation
src/ # Svelte 5 frontend
src-tauri/ # Rust backend (Tauri commands, sidecar manager, SQLite)
python/ # Python sidecar (transcription, diarization, AI)
voice_to_notes/ # Python package
build_sidecar.py # PyInstaller build script
voice_to_notes.spec # PyInstaller spec
.gitea/workflows/ # Gitea Actions CI/CD
```
## License
[MIT](LICENSE)
MIT

View File

@@ -1,240 +0,0 @@
# Voice to Notes — User Guide
## Getting Started
### Installation
Download the installer for your platform from the [Releases](https://repo.anhonesthost.net/MacroPad/voice-to-notes/releases) page:
- **Windows:** `.msi` or `-setup.exe`
- **Linux:** `.deb` or `.rpm`
- **macOS:** `.dmg`
### First-Time Setup
On first launch, Voice to Notes will prompt you to download its AI engine (the "sidecar"):
1. Choose **Standard (CPU)** (~500 MB) or **GPU Accelerated (CUDA)** (~2 GB)
- Choose CUDA if you have an NVIDIA GPU for significantly faster transcription
- CPU works on all computers
2. Click **Download & Install** and wait for the download to complete
3. The app will proceed to the main interface once the sidecar is ready
The sidecar only needs to be downloaded once. Updates are detected automatically on launch.
---
## Basic Workflow
### 1. Import Audio or Video
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
- **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA
- **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted
> **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system.
### 2. Transcribe
After importing, click **Transcribe** to start the transcription pipeline:
- **Transcription:** Converts speech to text with word-level timestamps
- **Speaker Detection:** Identifies different speakers (if configured — see [Speaker Detection](#speaker-detection))
- A progress bar shows the current stage and percentage
### 3. Review and Edit
- The **waveform** displays at the top — click anywhere to seek
- The **transcript** shows below with speaker labels and timestamps
- **Click any word** in the transcript to jump to that point in the audio
- The current word highlights during playback
- **Edit text** directly in the transcript — word timings are preserved
### 4. Export
Click **Export** and choose a format:
| Format | Extension | Best For |
|--------|-----------|----------|
| SRT | `.srt` | Video subtitles (most compatible) |
| WebVTT | `.vtt` | Web video players, HTML5 |
| ASS/SSA | `.ass` | Styled subtitles with speaker colors |
| Plain Text | `.txt` | Reading, sharing, pasting |
| Markdown | `.md` | Documentation, notes |
All formats include speaker labels when speaker detection is enabled.
### 5. Save Project
- **Ctrl+S** (Cmd+S) saves the current project as a `.vtn` file
- This preserves the full transcript, speaker assignments, and edits
- Reopen later to continue editing or re-export
---
## Playback Controls
| Action | Shortcut |
|--------|----------|
| Play / Pause | **Space** |
| Skip back 5s | **Left Arrow** |
| Skip forward 5s | **Right Arrow** |
| Seek to word | Click any word in the transcript |
| Import audio | **Ctrl+O** / **Cmd+O** |
| Open settings | **Ctrl+,** / **Cmd+,** |
---
## Speaker Detection
Speaker detection (diarization) identifies who is speaking at each point in the audio. It requires a one-time setup:
### Setup
1. Go to **Settings > Speakers**
2. Create a free account at [huggingface.co](https://huggingface.co/join)
3. Accept the license on **all three** model pages:
- [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1)
- [pyannote/segmentation-3.0](https://huggingface.co/pyannote/segmentation-3.0)
- [pyannote/speaker-diarization-community-1](https://huggingface.co/pyannote/speaker-diarization-community-1)
4. Create a token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) (read access is sufficient)
5. Paste the token in Settings and click **Test & Download Model**
### Speaker Options
- **Number of speakers:** Set to auto-detect or specify a fixed number for faster results
- **Skip speaker detection:** Check this to only transcribe without identifying speakers
### Managing Speakers
After transcription, speakers appear as "Speaker 1", "Speaker 2", etc. in the left sidebar. Double-click a speaker name to rename it — the new name appears throughout the transcript and in exports.
---
## AI Chat
The AI chat panel lets you ask questions about your transcript. The AI sees the full transcript with speaker labels as context.
Example prompts:
- "Summarize this conversation"
- "What were the key action items?"
- "What did Speaker 1 say about the budget?"
### Setting Up Ollama (Local AI)
[Ollama](https://ollama.com) runs AI models locally on your computer — no API keys or internet required.
1. **Install Ollama:**
- Download from [ollama.com](https://ollama.com)
- Or on Linux: `curl -fsSL https://ollama.com/install.sh | sh`
2. **Pull a model:**
```bash
ollama pull llama3.2
```
Other good options: `mistral`, `gemma2`, `phi3`
3. **Configure in Voice to Notes:**
- Go to **Settings > AI Provider**
- Select **Ollama**
- URL: `http://localhost:11434` (default, usually no change needed)
- Model: `llama3.2` (or whichever model you pulled)
4. **Use:** Open the AI chat panel (right sidebar) and start asking questions
### Cloud AI Providers
If you prefer cloud-based AI:
**OpenAI:**
- Select **OpenAI** in Settings > AI Provider
- Enter your API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
- Default model: `gpt-4o-mini`
**Anthropic:**
- Select **Anthropic** in Settings > AI Provider
- Enter your API key from [console.anthropic.com](https://console.anthropic.com)
- Default model: `claude-sonnet-4-6`
**OpenAI Compatible:**
- For any provider with an OpenAI-compatible API (vLLM, LiteLLM, etc.)
- Enter the API base URL, key, and model name
---
## Settings Reference
### Transcription
| Setting | Options | Default |
|---------|---------|---------|
| Whisper Model | tiny, base, small, medium, large-v3 | base |
| Device | CPU, CUDA | CPU |
| Language | Auto-detect, or specify (en, es, fr, etc.) | Auto-detect |
**Model recommendations:**
- **tiny/base:** Fast, good for clear audio with one speaker
- **small:** Best balance of speed and accuracy
- **medium:** Better accuracy, noticeably slower
- **large-v3:** Best accuracy, requires 8GB+ VRAM (GPU) or 16GB+ RAM (CPU)
### Debug
- **Enable Developer Tools:** Opens the browser inspector for debugging
---
## Installing FFmpeg
FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription.
**Windows:**
```
winget install ffmpeg
```
Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH.
**macOS:**
```
brew install ffmpeg
```
**Linux (Debian/Ubuntu):**
```
sudo apt install ffmpeg
```
**Linux (Fedora/RHEL):**
```
sudo dnf install ffmpeg
```
After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.).
---
## Troubleshooting
### Video import fails / "FFmpeg not found"
- Install FFmpeg using the instructions above
- Make sure `ffmpeg` is in your system PATH
- Restart Voice to Notes after installing
### Transcription is slow
- Use a smaller model (tiny or base)
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
- Ensure you downloaded the CUDA sidecar during setup
### Speaker detection not working
- Verify your HuggingFace token in Settings > Speakers
- Click "Test & Download Model" to re-download
- Make sure you accepted the license on all three model pages
### Audio won't play / No waveform
- Check that the audio file still exists at its original location
- Try re-importing the file
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA
### App shows "Setting up Voice to Notes"
- This is the first-launch sidecar download — it only happens once
- If it fails, check your internet connection and click Retry

4
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "voice-to-notes",
"version": "0.2.10",
"version": "0.1.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "voice-to-notes",
"version": "0.2.10",
"version": "0.1.0",
"license": "MIT",
"dependencies": {
"@tauri-apps/api": "^2",

View File

@@ -1,6 +1,6 @@
{
"name": "voice-to-notes",
"version": "0.2.46",
"version": "0.1.0",
"description": "Desktop app for transcribing audio/video with speaker identification",
"type": "module",
"scripts": {

View File

@@ -92,42 +92,34 @@ def create_venv_and_install(cpu_only: bool) -> Path:
# Determine python path inside venv
if sys.platform == "win32":
python = str(venv_dir / "Scripts" / "python.exe")
python = str(venv_dir / "Scripts" / "python")
else:
python = str(venv_dir / "bin" / "python")
def pip_install(*args: str) -> None:
"""Install packages. Pass package names and flags only, not 'install'."""
def pkg_install(*args: str) -> None:
if use_uv:
# Use --python with the venv directory (not the python binary) for uv
subprocess.run(
["uv", "pip", "install", "--python", str(venv_dir), *args],
check=True,
)
subprocess.run(["uv", "pip", "install", "--python", python, *args], check=True)
else:
subprocess.run([python, "-m", "pip", "install", *args], check=True)
subprocess.run([python, "-m", "pip", *args], check=True)
if not use_uv:
# Upgrade pip (uv doesn't need this)
pip_install("--upgrade", "pip", "setuptools", "wheel")
pkg_install("install", "--upgrade", "pip", "setuptools", "wheel")
# Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
if cpu_only:
print("[build] Installing PyTorch (CPU-only)")
pip_install(
"torch", "torchaudio",
pkg_install(
"install", "torch", "torchaudio",
"--index-url", "https://download.pytorch.org/whl/cpu",
)
else:
print("[build] Installing PyTorch (CUDA 12.6)")
pip_install(
"torch", "torchaudio",
"--index-url", "https://download.pytorch.org/whl/cu126",
)
print("[build] Installing PyTorch (default, may include CUDA)")
pkg_install("install", "torch", "torchaudio")
# Install project and dev deps (includes pyinstaller)
print("[build] Installing project dependencies")
pip_install("-e", f"{SCRIPT_DIR}[dev]")
pkg_install("install", "-e", f"{SCRIPT_DIR}[dev]")
return Path(python)
@@ -239,9 +231,10 @@ def main() -> None:
python = create_venv_and_install(cpu_only)
output_dir = run_pyinstaller(python)
download_ffmpeg(output_dir)
rename_binary(output_dir, target_triple)
print(f"\n[build] Done! Sidecar built at: {output_dir}")
print(f"[build] Copy directory to src-tauri/sidecar/ for Tauri resource bundling")
print(f"[build] Copy contents to src-tauri/binaries/ for Tauri bundling")
if __name__ == "__main__":

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "voice-to-notes"
version = "1.0.13"
version = "0.1.0"
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
requires-python = ">=3.11"
license = "MIT"
@@ -15,7 +15,6 @@ dependencies = [
"pysubs2>=1.7.0",
"openai>=1.0.0",
"anthropic>=0.20.0",
"soundfile>=0.12.0",
]
[project.optional-dependencies]

View File

@@ -12,17 +12,15 @@ faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = co
"faster_whisper"
)
pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
soundfile_datas, soundfile_binaries, soundfile_hiddenimports = collect_all("soundfile")
a = Analysis(
["voice_to_notes/main.py"],
pathex=[],
binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries + soundfile_binaries,
datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas + soundfile_datas,
binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
hiddenimports=[
"torch",
"torchaudio",
"soundfile",
"huggingface_hub",
"pysubs2",
"openai",
@@ -31,22 +29,11 @@ a = Analysis(
]
+ ctranslate2_hiddenimports
+ faster_whisper_hiddenimports
+ pyannote_hiddenimports
+ soundfile_hiddenimports,
+ pyannote_hiddenimports,
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[
"tkinter", "test", "pip", "setuptools",
# ctranslate2.converters imports torch at module level and causes
# circular import crashes under PyInstaller. These modules are only
# needed for model format conversion, never for inference.
"ctranslate2.converters",
# torchcodec is partially bundled by PyInstaller but non-functional
# (missing FFmpeg shared libs). Excluding it forces pyannote.audio
# to fall back to torchaudio for audio decoding.
"torchcodec",
],
excludes=["tkinter", "test", "unittest", "pip", "setuptools"],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,

View File

@@ -105,23 +105,14 @@ def detect_hardware() -> HardwareInfo:
# RAM info (cross-platform)
info.ram_mb = _detect_ram_mb()
# CUDA detection — verify runtime libraries actually work, not just torch detection
# CUDA detection
try:
import torch
if torch.cuda.is_available():
# Test that CUDA runtime libraries are actually loadable
try:
torch.zeros(1, device="cuda")
info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
except Exception as e:
print(
f"[sidecar] CUDA detected but runtime unavailable: {e}. Using CPU.",
file=sys.stderr,
flush=True,
)
info.has_cuda = True
info.cuda_device_name = torch.cuda.get_device_name(0)
info.vram_mb = torch.cuda.get_device_properties(0).total_mem // (1024 * 1024)
except ImportError:
print("[sidecar] torch not available, GPU detection skipped", file=sys.stderr, flush=True)

View File

@@ -41,15 +41,11 @@ def ping_handler(msg: IPCMessage) -> IPCMessage:
def make_transcribe_handler() -> HandlerFunc:
"""Create a transcription handler with a persistent TranscribeService."""
service = None
from voice_to_notes.services.transcribe import TranscribeService, result_to_payload
service = TranscribeService()
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.transcribe import TranscribeService
service = TranscribeService()
from voice_to_notes.services.transcribe import result_to_payload
payload = msg.payload
result = service.transcribe(
request_id=msg.id,
@@ -70,15 +66,11 @@ def make_transcribe_handler() -> HandlerFunc:
def make_diarize_handler() -> HandlerFunc:
"""Create a diarization handler with a persistent DiarizeService."""
service = None
from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
service = DiarizeService()
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.diarize import DiarizeService
service = DiarizeService()
from voice_to_notes.services.diarize import diarization_to_payload
payload = msg.payload
result = service.diarize(
request_id=msg.id,
@@ -171,15 +163,11 @@ def make_diarize_download_handler() -> HandlerFunc:
def make_pipeline_handler() -> HandlerFunc:
"""Create a full pipeline handler (transcribe + diarize + merge)."""
service = None
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
service = PipelineService()
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.pipeline import PipelineService
service = PipelineService()
from voice_to_notes.services.pipeline import pipeline_result_to_payload
payload = msg.payload
result = service.run(
request_id=msg.id,
@@ -205,15 +193,11 @@ def make_pipeline_handler() -> HandlerFunc:
def make_export_handler() -> HandlerFunc:
"""Create an export handler."""
service = None
from voice_to_notes.services.export import ExportService, make_export_request
service = ExportService()
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.export import ExportService
service = ExportService()
from voice_to_notes.services.export import make_export_request
request = make_export_request(msg.payload)
output_path = service.export(request)
return IPCMessage(
@@ -227,14 +211,11 @@ def make_export_handler() -> HandlerFunc:
def make_ai_chat_handler() -> HandlerFunc:
"""Create an AI chat handler with persistent AIProviderService."""
service = None
from voice_to_notes.services.ai_provider import create_default_service
service = create_default_service()
def handler(msg: IPCMessage) -> IPCMessage:
nonlocal service
if service is None:
from voice_to_notes.services.ai_provider import create_default_service
service = create_default_service()
payload = msg.payload
action = payload.get("action", "chat")
@@ -254,15 +235,15 @@ def make_ai_chat_handler() -> HandlerFunc:
)
if action == "configure":
# Re-create a provider with custom settings and set it active
# Re-create a provider with custom settings
provider_name = payload.get("provider", "")
config = payload.get("config", {})
if provider_name == "local":
from voice_to_notes.providers.local_provider import LocalProvider
service.register_provider("local", LocalProvider(
base_url=config.get("base_url", "http://localhost:11434/v1"),
model=config.get("model", "llama3.2"),
base_url=config.get("base_url", "http://localhost:8080"),
model=config.get("model", "local"),
))
elif provider_name == "openai":
from voice_to_notes.providers.openai_provider import OpenAIProvider
@@ -286,10 +267,6 @@ def make_ai_chat_handler() -> HandlerFunc:
api_key=config.get("api_key"),
api_base=config.get("api_base"),
))
# Set the configured provider as active
print(f"[sidecar] Configured AI provider: {provider_name} with config: {config}", file=sys.stderr, flush=True)
if provider_name in ("local", "openai", "anthropic", "litellm"):
service.set_active(provider_name)
return IPCMessage(
id=msg.id,
type="ai.configured",

View File

@@ -5,7 +5,6 @@ from __future__ import annotations
import signal
import sys
# CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
# that might print to stdout and corrupt the JSON-line protocol.
from voice_to_notes.ipc.protocol import init_ipc

View File

@@ -1,4 +1,4 @@
"""Local AI provider — Ollama or any OpenAI-compatible API."""
"""Local AI provider — bundled llama-server (OpenAI-compatible API)."""
from __future__ import annotations
@@ -9,9 +9,9 @@ from voice_to_notes.providers.base import AIProvider
class LocalProvider(AIProvider):
"""Connects to Ollama or any OpenAI-compatible API server."""
"""Connects to bundled llama-server via its OpenAI-compatible API."""
def __init__(self, base_url: str = "http://localhost:11434/v1", model: str = "llama3.2") -> None:
def __init__(self, base_url: str = "http://localhost:8080", model: str = "local") -> None:
self._base_url = base_url.rstrip("/")
self._model = model
self._client: Any = None
@@ -24,8 +24,8 @@ class LocalProvider(AIProvider):
from openai import OpenAI
self._client = OpenAI(
base_url=self._base_url,
api_key="ollama", # Ollama doesn't require a real key
base_url=f"{self._base_url}/v1",
api_key="not-needed", # llama-server doesn't require an API key
)
except ImportError:
raise RuntimeError(
@@ -47,9 +47,7 @@ class LocalProvider(AIProvider):
try:
import urllib.request
# Check base URL without /v1 suffix for Ollama root endpoint
root_url = self._base_url.replace("/v1", "")
req = urllib.request.Request(root_url, method="GET")
req = urllib.request.Request(f"{self._base_url}/health", method="GET")
with urllib.request.urlopen(req, timeout=2) as resp:
return resp.status == 200
except Exception:
@@ -57,4 +55,4 @@ class LocalProvider(AIProvider):
@property
def name(self) -> str:
return "Ollama"
return "Local (llama-server)"

View File

@@ -20,81 +20,6 @@ from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
from voice_to_notes.ipc.messages import progress_message
from voice_to_notes.ipc.protocol import write_message
_patched = False
def _patch_pyannote_audio() -> None:
"""Monkey-patch pyannote.audio.core.io.Audio to use torchaudio.
pyannote.audio has a bug where AudioDecoder (from torchcodec) is used
unconditionally even when torchcodec is not installed, causing NameError.
This replaces the Audio.__call__ method with a torchaudio-based version.
"""
global _patched
if _patched:
return
_patched = True
try:
import numpy as np
import soundfile as sf
import torch
from pyannote.audio.core.io import Audio
# Cache loaded audio to avoid re-reading the entire file for every crop call.
# For a 3-hour file, crop is called 1000+ times — without caching, each call
# reads ~345MB from disk.
_audio_cache: dict[str, tuple] = {}
def _sf_load(audio_path: str) -> tuple:
"""Load audio via soundfile with caching."""
key = str(audio_path)
if key in _audio_cache:
return _audio_cache[key]
data, sample_rate = sf.read(key, dtype="float32")
waveform = torch.from_numpy(np.array(data))
if waveform.ndim == 1:
waveform = waveform.unsqueeze(0)
else:
waveform = waveform.T
_audio_cache[key] = (waveform, sample_rate)
return waveform, sample_rate
def _soundfile_call(self, file: dict) -> tuple:
"""Replacement for Audio.__call__."""
return _sf_load(file["audio"])
def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
"""Replacement for Audio.crop — load file once (cached) then slice.
Pads short segments with zeros to match the expected duration,
which pyannote requires for batched embedding extraction.
"""
duration = kwargs.get("duration", None)
waveform, sample_rate = _sf_load(file["audio"])
# Convert segment (seconds) to sample indices
start_sample = int(segment.start * sample_rate)
end_sample = int(segment.end * sample_rate)
# Clamp to bounds
start_sample = max(0, start_sample)
end_sample = min(waveform.shape[-1], end_sample)
cropped = waveform[:, start_sample:end_sample]
# Pad to expected duration if needed (pyannote batches require uniform size)
if duration is not None:
expected_samples = int(duration * sample_rate)
else:
expected_samples = int((segment.end - segment.start) * sample_rate)
if cropped.shape[-1] < expected_samples:
pad = torch.zeros(cropped.shape[0], expected_samples - cropped.shape[-1])
cropped = torch.cat([cropped, pad], dim=-1)
return cropped, sample_rate
Audio.__call__ = _soundfile_call # type: ignore[assignment]
Audio.crop = _soundfile_crop # type: ignore[assignment]
print("[sidecar] Patched pyannote Audio to use soundfile", file=sys.stderr, flush=True)
except Exception as e:
print(f"[sidecar] Warning: Could not patch pyannote Audio: {e}", file=sys.stderr, flush=True)
def _ensure_wav(file_path: str) -> tuple[str, str | None]:
"""Convert audio to 16kHz mono WAV if needed.
@@ -188,7 +113,6 @@ class DiarizeService:
]
last_error: Exception | None = None
_patch_pyannote_audio()
for model_name in models:
try:
from pyannote.audio import Pipeline
@@ -288,20 +212,13 @@ class DiarizeService:
thread.start()
elapsed = 0.0
estimated_total = max(audio_duration_sec * 0.8, 30.0) if audio_duration_sec else 120.0
duration_str = ""
if audio_duration_sec and audio_duration_sec > 600:
mins = int(audio_duration_sec / 60)
duration_str = f" ({mins}min audio, this may take a while)"
while not done_event.wait(timeout=5.0):
elapsed += 5.0
estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
while not done_event.wait(timeout=2.0):
elapsed += 2.0
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
elapsed_min = int(elapsed / 60)
elapsed_sec = int(elapsed % 60)
time_str = f"{elapsed_min}m{elapsed_sec:02d}s" if elapsed_min > 0 else f"{int(elapsed)}s"
write_message(progress_message(
request_id, pct, "diarizing",
f"Analyzing speakers ({time_str} elapsed){duration_str}"))
f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
thread.join()

View File

@@ -77,28 +77,11 @@ class TranscribeService:
file=sys.stderr,
flush=True,
)
try:
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
except Exception as e:
if device != "cpu":
print(
f"[sidecar] Failed to load on {device}: {e}. Falling back to CPU.",
file=sys.stderr,
flush=True,
)
device = "cpu"
compute_type = "int8"
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
else:
raise
self._model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
)
self._current_model_name = model_name
self._current_device = device
self._current_compute_type = compute_type
@@ -113,22 +96,17 @@ class TranscribeService:
compute_type: str = "int8",
language: str | None = None,
on_segment: Callable[[SegmentResult, int], None] | None = None,
chunk_label: str | None = None,
) -> TranscriptionResult:
"""Transcribe an audio file with word-level timestamps.
Sends progress messages via IPC during processing.
If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it.
"""
prefix = f"{chunk_label}: " if chunk_label else ""
# Stage: loading model (skip for chunks after the first — model already loaded)
if not chunk_label:
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
# Stage: loading model
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
model = self._ensure_model(model_name, device, compute_type)
# Stage: transcribing
write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription..."))
write_message(progress_message(request_id, 10, "transcribing", "Starting transcription..."))
start_time = time.time()
segments_iter, info = model.transcribe(
@@ -181,7 +159,7 @@ class TranscribeService:
request_id,
progress_pct,
"transcribing",
f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...",
f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
)
)
@@ -276,7 +254,6 @@ class TranscribeService:
chunk_result = self.transcribe(
request_id, tmp.name, model_name, device,
compute_type, language, on_segment=chunk_on_segment,
chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}",
)
# Offset timestamps and merge

52
src-tauri/Cargo.lock generated
View File

@@ -59,15 +59,6 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "async-broadcast"
version = "0.7.2"
@@ -664,17 +655,6 @@ dependencies = [
"serde_core",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "derive_more"
version = "0.99.20"
@@ -4382,7 +4362,7 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "voice-to-notes"
version = "0.2.2"
version = "0.1.0"
dependencies = [
"chrono",
"rusqlite",
@@ -4394,7 +4374,6 @@ dependencies = [
"tauri-plugin-opener",
"thiserror 1.0.69",
"uuid",
"zip",
]
[[package]]
@@ -5433,41 +5412,12 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "zip"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
dependencies = [
"arbitrary",
"crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2",
"indexmap 2.13.0",
"memchr",
"thiserror 2.0.18",
"zopfli",
]
[[package]]
name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zopfli"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]
[[package]]
name = "zvariant"
version = "5.10.0"

View File

@@ -1,6 +1,6 @@
[package]
name = "voice-to-notes"
version = "0.2.46"
version = "0.1.0"
description = "Voice to Notes — desktop transcription with speaker identification"
authors = ["Voice to Notes Contributors"]
license = "MIT"
@@ -14,16 +14,12 @@ crate-type = ["staticlib", "cdylib", "rlib"]
tauri-build = { version = "2", features = [] }
[dependencies]
tauri = { version = "2", features = ["protocol-asset", "devtools"] }
tauri = { version = "2", features = ["protocol-asset"] }
tauri-plugin-opener = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
rusqlite = { version = "0.31", features = ["bundled"] }
uuid = { version = "1", features = ["v4", "serde"] }
zip = { version = "2", default-features = false, features = ["deflate"] }
thiserror = "1"
chrono = { version = "0.4", features = ["serde"] }
tauri-plugin-dialog = "2.6.0"
reqwest = { version = "0.12", features = ["stream", "json"] }
futures-util = "0.3"
bytes = "1"

View File

@@ -1,21 +1,3 @@
fn main() {
// Ensure sidecar.zip exists so tauri-build doesn't fail.
// CI replaces this placeholder with the real PyInstaller sidecar archive.
let zip_path = std::path::Path::new("sidecar.zip");
if !zip_path.exists() {
// Minimal valid zip (empty archive): end-of-central-directory record
let empty_zip: [u8; 22] = [
0x50, 0x4b, 0x05, 0x06, // EOCD signature
0x00, 0x00, // disk number
0x00, 0x00, // disk with central dir
0x00, 0x00, // entries on this disk
0x00, 0x00, // total entries
0x00, 0x00, 0x00, 0x00, // central dir size
0x00, 0x00, 0x00, 0x00, // central dir offset
0x00, 0x00, // comment length
];
std::fs::write(zip_path, empty_zip).expect("Failed to create placeholder sidecar.zip");
}
tauri_build::build()
}

View File

@@ -1,11 +0,0 @@
; NSIS uninstall hook for Voice to Notes
; Removes the sidecar data directory (extracted sidecar binaries + logs)
; but preserves user data in $PROFILE\.voicetonotes (database, settings, models)
!macro NSIS_HOOK_POSTUNINSTALL
; Remove the Tauri app_local_data_dir which contains:
; - Extracted sidecar directories (voice-to-notes-sidecar/)
; - sidecar.log
; Path: %LOCALAPPDATA%\com.voicetonotes.app
RMDir /r "$LOCALAPPDATA\com.voicetonotes.app"
!macroend

View File

@@ -1,152 +0,0 @@
use std::path::PathBuf;
use std::process::Command;
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
/// Extract audio from a video file to a WAV file using ffmpeg.
/// Returns the path to the extracted audio file.
#[tauri::command]
pub fn extract_audio(file_path: String, output_path: Option<String>) -> Result<String, String> {
let input = PathBuf::from(&file_path);
if !input.exists() {
return Err(format!("File not found: {}", file_path));
}
// Use provided output path, or fall back to a temp WAV file
let stem = input.file_stem().unwrap_or_default().to_string_lossy();
let output = match output_path {
Some(ref p) => PathBuf::from(p),
None => std::env::temp_dir().join(format!("{stem}_audio.wav")),
};
eprintln!(
"[media] Extracting audio: {} -> {}",
input.display(),
output.display()
);
// Find ffmpeg — check sidecar extract dir first, then system PATH
let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?;
let mut cmd = Command::new(&ffmpeg);
cmd.args([
"-y", // Overwrite output
"-i",
&file_path,
"-vn", // No video
"-acodec",
"pcm_s16le", // WAV PCM 16-bit
"-ar",
"22050", // 22kHz mono for better playback quality
"-ac",
"1", // Mono
])
.arg(output.to_str().unwrap())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::piped());
// Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
#[cfg(target_os = "windows")]
cmd.creation_flags(0x08000000);
let status = match cmd.status() {
Ok(s) => s,
Err(e) if e.raw_os_error() == Some(13) => {
// Permission denied — fix permissions and retry
eprintln!("[media] Permission denied on ffmpeg, fixing permissions and retrying...");
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(meta) = std::fs::metadata(&ffmpeg) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&ffmpeg, perms);
}
// Also fix ffprobe if it exists
let ffprobe = ffmpeg.replace("ffmpeg", "ffprobe");
if let Ok(meta) = std::fs::metadata(&ffprobe) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&ffprobe, perms);
}
}
Command::new(&ffmpeg)
.args(["-y", "-i", &file_path, "-vn", "-acodec", "pcm_s16le", "-ar", "22050", "-ac", "1"])
.arg(output.to_str().unwrap())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::piped())
.status()
.map_err(|e| format!("Failed to run ffmpeg after chmod: {e}"))?
}
Err(e) => return Err(format!("Failed to run ffmpeg: {e}")),
};
if !status.success() {
return Err(format!("ffmpeg exited with status {status}"));
}
if !output.exists() {
return Err("ffmpeg completed but output file not found".to_string());
}
eprintln!("[media] Audio extracted successfully");
Ok(output.to_string_lossy().to_string())
}
#[tauri::command]
pub fn check_file_exists(path: String) -> bool {
std::path::Path::new(&path).exists()
}
#[tauri::command]
pub fn copy_file(src: String, dst: String) -> Result<(), String> {
std::fs::copy(&src, &dst).map_err(|e| format!("Failed to copy file: {e}"))?;
Ok(())
}
#[tauri::command]
pub fn create_dir(path: String) -> Result<(), String> {
std::fs::create_dir_all(&path).map_err(|e| format!("Failed to create directory: {e}"))?;
Ok(())
}
/// Find ffmpeg binary — check sidecar directory first, then system PATH.
fn find_ffmpeg() -> Option<String> {
// Check sidecar extract dir (ffmpeg is bundled with the sidecar)
if let Some(data_dir) = crate::sidecar::DATA_DIR.get() {
// Read sidecar version to find the right directory
let version_file = data_dir.join("sidecar-version.txt");
if let Ok(version) = std::fs::read_to_string(&version_file) {
let version = version.trim();
let sidecar_dir = data_dir.join(format!("sidecar-{version}"));
let ffmpeg_name = if cfg!(target_os = "windows") {
"ffmpeg.exe"
} else {
"ffmpeg"
};
let ffmpeg_path = sidecar_dir.join(ffmpeg_name);
if ffmpeg_path.exists() {
return Some(ffmpeg_path.to_string_lossy().to_string());
}
}
}
// Fall back to system PATH
let ffmpeg_name = if cfg!(target_os = "windows") {
"ffmpeg.exe"
} else {
"ffmpeg"
};
if Command::new(ffmpeg_name)
.arg("-version")
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.is_ok()
{
return Some(ffmpeg_name.to_string());
}
None
}

View File

@@ -1,8 +1,6 @@
pub mod ai;
pub mod export;
pub mod media;
pub mod project;
pub mod settings;
pub mod sidecar;
pub mod system;
pub mod transcribe;

View File

@@ -12,12 +12,7 @@ use crate::state::AppState;
pub struct ProjectFile {
pub version: u32,
pub name: String,
#[serde(default)]
pub audio_file: Option<String>,
#[serde(default)]
pub source_file: Option<String>,
#[serde(default)]
pub audio_wav: Option<String>,
pub audio_file: String,
pub created_at: String,
pub segments: Vec<ProjectFileSegment>,
pub speakers: Vec<ProjectFileSpeaker>,

View File

@@ -32,16 +32,3 @@ pub fn save_settings(settings: Value) -> Result<(), String> {
fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
Ok(())
}
/// Toggle devtools on the main window.
#[tauri::command]
pub fn toggle_devtools(app: tauri::AppHandle, open: bool) {
use tauri::Manager;
if let Some(window) = app.get_webview_window("main") {
if open {
window.open_devtools();
} else {
window.close_devtools();
}
}
}

View File

@@ -1,258 +0,0 @@
use futures_util::StreamExt;
use serde::Serialize;
use std::io::Write;
use tauri::{AppHandle, Emitter};
use crate::sidecar::{SidecarManager, DATA_DIR};
const REPO_API: &str = "https://repo.anhonesthost.net/api/v1/repos/MacroPad/voice-to-notes";
#[derive(Serialize, Clone)]
struct DownloadProgress {
downloaded: u64,
total: u64,
percent: u8,
}
#[derive(Serialize)]
pub struct UpdateInfo {
pub current_version: String,
pub latest_version: String,
}
/// Read the locally installed sidecar version from `sidecar-version.txt`.
/// Returns `None` if the file doesn't exist or can't be read.
fn read_local_sidecar_version() -> Option<String> {
let data_dir = DATA_DIR.get()?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::read_to_string(version_file)
.ok()
.map(|v| v.trim().to_string())
.filter(|v| !v.is_empty())
}
/// Write the sidecar version to `sidecar-version.txt` after a successful download.
fn write_local_sidecar_version(version: &str) -> Result<(), String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::write(&version_file, version)
.map_err(|e| format!("Failed to write sidecar version file: {}", e))
}
/// Fetch releases from the Gitea API and find the latest sidecar release
/// (one whose tag_name starts with "sidecar-v").
async fn fetch_latest_sidecar_release(
client: &reqwest::Client,
) -> Result<serde_json::Value, String> {
let releases_url = format!("{}/releases?limit=20", REPO_API);
let resp = client
.get(&releases_url)
.header("Accept", "application/json")
.send()
.await
.map_err(|e| format!("Failed to fetch releases: {}", e))?;
if !resp.status().is_success() {
return Err(format!("Failed to fetch releases: HTTP {}", resp.status()));
}
let releases = resp
.json::<Vec<serde_json::Value>>()
.await
.map_err(|e| format!("Failed to parse releases JSON: {}", e))?;
releases
.into_iter()
.find(|r| {
r["tag_name"]
.as_str()
.map_or(false, |t| t.starts_with("sidecar-v"))
})
.ok_or_else(|| "No sidecar release found".to_string())
}
/// Extract the version string from a sidecar tag name (e.g. "sidecar-v1.0.1" -> "1.0.1").
fn version_from_sidecar_tag(tag: &str) -> &str {
tag.strip_prefix("sidecar-v").unwrap_or(tag)
}
/// Check if the sidecar binary exists for the currently installed version.
#[tauri::command]
pub fn check_sidecar() -> bool {
let data_dir = match DATA_DIR.get() {
Some(d) => d,
None => return false,
};
let version = match read_local_sidecar_version() {
Some(v) => v,
None => return false,
};
let binary_name = if cfg!(target_os = "windows") {
"voice-to-notes-sidecar.exe"
} else {
"voice-to-notes-sidecar"
};
let extract_dir = data_dir.join(format!("sidecar-{}", version));
extract_dir.join(binary_name).exists()
}
/// Determine the current platform name for asset downloads.
fn platform_os() -> &'static str {
if cfg!(target_os = "windows") {
"windows"
} else if cfg!(target_os = "macos") {
"macos"
} else {
"linux"
}
}
/// Determine the current architecture name for asset downloads.
fn platform_arch() -> &'static str {
if cfg!(target_arch = "aarch64") {
"aarch64"
} else {
"x86_64"
}
}
/// Download the sidecar binary for the given variant (cpu or cuda).
#[tauri::command]
pub async fn download_sidecar(app: AppHandle, variant: String) -> Result<(), String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let os = platform_os();
let arch = platform_arch();
let asset_name = format!("sidecar-{}-{}-{}.zip", os, arch, variant);
// Fetch the latest sidecar release from Gitea API
let client = reqwest::Client::new();
let sidecar_release = fetch_latest_sidecar_release(&client).await?;
let tag = sidecar_release["tag_name"]
.as_str()
.ok_or("No tag_name in sidecar release")?;
let sidecar_version = version_from_sidecar_tag(tag).to_string();
// Find the matching asset
let assets = sidecar_release["assets"]
.as_array()
.ok_or("No assets found in sidecar release")?;
let download_url = assets
.iter()
.find(|a| a["name"].as_str() == Some(&asset_name))
.and_then(|a| a["browser_download_url"].as_str())
.ok_or_else(|| {
format!(
"Asset '{}' not found in sidecar release {}",
asset_name, tag
)
})?
.to_string();
// Stream download with progress events
let response: reqwest::Response = client
.get(&download_url)
.send()
.await
.map_err(|e| format!("Failed to start download: {}", e))?;
if !response.status().is_success() {
return Err(format!("Download failed: HTTP {}", response.status()));
}
let total: u64 = response.content_length().unwrap_or(0);
let mut downloaded: u64 = 0;
let mut stream = response.bytes_stream();
let zip_path = data_dir.join("sidecar.zip");
let mut file = std::fs::File::create(&zip_path)
.map_err(|e| format!("Failed to create zip file: {}", e))?;
while let Some(chunk) = stream.next().await {
let chunk: bytes::Bytes = chunk.map_err(|e| format!("Download stream error: {}", e))?;
file.write_all(&chunk)
.map_err(|e| format!("Failed to write chunk: {}", e))?;
downloaded += chunk.len() as u64;
let percent = if total > 0 {
(downloaded * 100 / total) as u8
} else {
0
};
let _ = app.emit(
"sidecar-download-progress",
DownloadProgress {
downloaded,
total,
percent,
},
);
}
// Extract the downloaded zip
let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
SidecarManager::extract_zip(&zip_path, &extract_dir)?;
// Make all binaries executable on Unix (sidecar, ffmpeg, ffprobe, etc.)
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(entries) = std::fs::read_dir(&extract_dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
if let Ok(meta) = std::fs::metadata(&path) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&path, perms);
}
}
}
}
}
// Write the sidecar version file
write_local_sidecar_version(&sidecar_version)?;
// Clean up the zip file and old sidecar versions
let _ = std::fs::remove_file(&zip_path);
SidecarManager::cleanup_old_sidecars(data_dir, &sidecar_version);
Ok(())
}
/// Check if a sidecar update is available.
#[tauri::command]
pub async fn check_sidecar_update() -> Result<Option<UpdateInfo>, String> {
// If sidecar doesn't exist yet, return None (first launch handled separately)
if !check_sidecar() {
return Ok(None);
}
let current_version = match read_local_sidecar_version() {
Some(v) => v,
None => return Ok(None),
};
// Fetch latest sidecar release from Gitea API
let client = reqwest::Client::new();
let sidecar_release = fetch_latest_sidecar_release(&client).await?;
let latest_tag = sidecar_release["tag_name"]
.as_str()
.ok_or("No tag_name in sidecar release")?;
let latest_version = version_from_sidecar_tag(latest_tag);
if latest_version != current_version {
Ok(Some(UpdateInfo {
current_version,
latest_version: latest_version.to_string(),
}))
} else {
Ok(None)
}
}

View File

@@ -60,18 +60,3 @@ pub fn llama_list_models() -> Value {
pub fn get_data_dir() -> String {
LlamaManager::data_dir().to_string_lossy().to_string()
}
/// Log a message from the frontend to a file for debugging.
#[tauri::command]
pub fn log_frontend(level: String, message: String) {
use std::io::Write;
let log_path = LlamaManager::data_dir().join("frontend.log");
if let Ok(mut file) = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(&log_path)
{
let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
let _ = writeln!(file, "[{timestamp}] [{level}] {message}");
}
}

View File

@@ -9,16 +9,12 @@ use tauri::Manager;
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
use commands::export::export_transcript;
use commands::media::{check_file_exists, copy_file, create_dir, extract_audio};
use commands::project::{
create_project, delete_project, get_project, list_projects, load_project_file,
load_project_transcript, save_project_file, save_project_transcript, update_segment,
};
use commands::settings::{load_settings, save_settings, toggle_devtools};
use commands::sidecar::{check_sidecar, check_sidecar_update, download_sidecar};
use commands::system::{
get_data_dir, llama_list_models, llama_start, llama_status, llama_stop, log_frontend,
};
use commands::settings::{load_settings, save_settings};
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
use state::AppState;
@@ -31,14 +27,6 @@ pub fn run() {
.plugin(tauri_plugin_dialog::init())
.manage(app_state)
.setup(|app| {
// Tell the sidecar manager where Tauri placed bundled resources
// and where to extract the sidecar archive
if let (Ok(resource_dir), Ok(data_dir)) =
(app.path().resource_dir(), app.path().app_local_data_dir())
{
sidecar::init_dirs(resource_dir, data_dir);
}
// Set the webview background to match the app's dark theme
if let Some(window) = app.get_webview_window("main") {
let _ = window.set_background_color(Some(Color(10, 10, 35, 255)));
@@ -69,15 +57,6 @@ pub fn run() {
get_data_dir,
load_settings,
save_settings,
check_sidecar,
download_sidecar,
check_sidecar_update,
log_frontend,
toggle_devtools,
extract_audio,
check_file_exists,
copy_file,
create_dir,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

View File

@@ -2,27 +2,11 @@ pub mod ipc;
pub mod messages;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, Command, Stdio};
use std::sync::{Mutex, OnceLock};
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
use crate::sidecar::messages::IPCMessage;
/// Resource directory set by the Tauri app during setup.
static RESOURCE_DIR: OnceLock<PathBuf> = OnceLock::new();
/// App data directory for extracting the sidecar archive.
pub(crate) static DATA_DIR: OnceLock<PathBuf> = OnceLock::new();
/// Initialize directories for sidecar resolution.
/// Must be called from the Tauri setup before any sidecar operations.
pub fn init_dirs(resource_dir: PathBuf, data_dir: PathBuf) {
RESOURCE_DIR.set(resource_dir).ok();
DATA_DIR.set(data_dir).ok();
}
/// Get the global sidecar manager singleton.
pub fn sidecar() -> &'static SidecarManager {
static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
@@ -56,203 +40,38 @@ impl SidecarManager {
cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
}
/// Read the locally installed sidecar version from `sidecar-version.txt`.
fn read_sidecar_version() -> Result<String, String> {
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let version_file = data_dir.join("sidecar-version.txt");
std::fs::read_to_string(&version_file)
.map_err(|_| {
"Sidecar not installed: sidecar-version.txt not found. Please download the sidecar."
.to_string()
})
.map(|v| v.trim().to_string())
.and_then(|v| {
if v.is_empty() {
Err(
"Sidecar version file is empty. Please re-download the sidecar."
.to_string(),
)
} else {
Ok(v)
}
})
}
/// Resolve the frozen sidecar binary path (production mode).
///
/// Reads the installed sidecar version from `sidecar-version.txt` and
/// looks for the binary in the corresponding `sidecar-{version}` directory.
/// If the version file doesn't exist, the sidecar hasn't been downloaded yet.
fn resolve_sidecar_path() -> Result<PathBuf, String> {
fn resolve_sidecar_path() -> Result<std::path::PathBuf, String> {
let exe = std::env::current_exe().map_err(|e| format!("Cannot get current exe: {e}"))?;
let exe_dir = exe
.parent()
.ok_or_else(|| "Cannot get exe parent directory".to_string())?;
let binary_name = if cfg!(target_os = "windows") {
"voice-to-notes-sidecar.exe"
} else {
"voice-to-notes-sidecar"
};
let data_dir = DATA_DIR.get().ok_or("App data directory not initialized")?;
let current_version = Self::read_sidecar_version()?;
let extract_dir = data_dir.join(format!("sidecar-{}", current_version));
let binary_path = extract_dir.join(binary_name);
// Already extracted — use it directly
if binary_path.exists() {
Self::cleanup_old_sidecars(data_dir, &current_version);
return Ok(binary_path);
// Tauri places externalBin next to the app binary
let path = exe_dir.join(binary_name);
if path.exists() {
return Ok(path);
}
// Find sidecar.zip in resource dir or next to exe
let zip_path = Self::find_sidecar_zip()?;
Self::extract_zip(&zip_path, &extract_dir)?;
if !binary_path.exists() {
return Err(format!(
"Sidecar binary not found after extraction at {}",
binary_path.display()
));
}
#[cfg(unix)]
Self::set_executable_permissions(&extract_dir);
Self::cleanup_old_sidecars(data_dir, &current_version);
Ok(binary_path)
}
/// Locate the bundled sidecar.zip archive.
fn find_sidecar_zip() -> Result<PathBuf, String> {
let mut candidates: Vec<PathBuf> = Vec::new();
if let Some(resource_dir) = RESOURCE_DIR.get() {
candidates.push(resource_dir.join("sidecar.zip"));
}
if let Ok(exe) = std::env::current_exe() {
if let Some(exe_dir) = exe.parent() {
candidates.push(exe_dir.join("sidecar.zip"));
}
}
for path in &candidates {
if path.exists() {
return Ok(path.clone());
}
// Also check inside a subdirectory (onedir PyInstaller output)
let subdir_path = exe_dir.join("voice-to-notes-sidecar").join(binary_name);
if subdir_path.exists() {
return Ok(subdir_path);
}
Err(format!(
"Sidecar archive not found. Checked:\n{}",
candidates
.iter()
.map(|p| format!(" {}", p.display()))
.collect::<Vec<_>>()
.join("\n"),
"Sidecar binary not found. Looked for:\n {}\n {}",
path.display(),
subdir_path.display(),
))
}
/// Extract a zip archive to the given directory.
pub(crate) fn extract_zip(zip_path: &Path, dest: &Path) -> Result<(), String> {
eprintln!(
"[sidecar-rs] Extracting sidecar from {} to {}",
zip_path.display(),
dest.display()
);
// Clean destination so we don't mix old and new files
if dest.exists() {
std::fs::remove_dir_all(dest)
.map_err(|e| format!("Failed to clean extraction dir: {e}"))?;
}
std::fs::create_dir_all(dest)
.map_err(|e| format!("Failed to create extraction dir: {e}"))?;
let file =
std::fs::File::open(zip_path).map_err(|e| format!("Cannot open sidecar zip: {e}"))?;
let mut archive =
zip::ZipArchive::new(file).map_err(|e| format!("Invalid sidecar zip: {e}"))?;
for i in 0..archive.len() {
let mut entry = archive
.by_index(i)
.map_err(|e| format!("Zip entry error: {e}"))?;
let name = entry.name().to_string();
let outpath = dest.join(&name);
if entry.is_dir() {
std::fs::create_dir_all(&outpath)
.map_err(|e| format!("Cannot create dir {}: {e}", outpath.display()))?;
} else {
if let Some(parent) = outpath.parent() {
std::fs::create_dir_all(parent)
.map_err(|e| format!("Cannot create dir {}: {e}", parent.display()))?;
}
let mut outfile = std::fs::File::create(&outpath)
.map_err(|e| format!("Cannot create {}: {e}", outpath.display()))?;
std::io::copy(&mut entry, &mut outfile)
.map_err(|e| format!("Write error for {}: {e}", name))?;
}
}
eprintln!("[sidecar-rs] Sidecar extracted successfully");
Ok(())
}
/// Remove old sidecar-* directories that don't match the current version.
/// Called after the current version's sidecar is confirmed ready.
/// Set execute permissions on all files in a directory (Unix only).
#[cfg(unix)]
fn set_executable_permissions(dir: &Path) {
use std::os::unix::fs::PermissionsExt;
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
if let Ok(meta) = std::fs::metadata(&path) {
let mut perms = meta.permissions();
perms.set_mode(0o755);
let _ = std::fs::set_permissions(&path, perms);
}
}
}
}
}
pub(crate) fn cleanup_old_sidecars(data_dir: &Path, current_version: &str) {
let current_dir_name = format!("sidecar-{}", current_version);
let entries = match std::fs::read_dir(data_dir) {
Ok(entries) => entries,
Err(e) => {
eprintln!("[sidecar-rs] Cannot read data dir for cleanup: {e}");
return;
}
};
for entry in entries.flatten() {
let name = entry.file_name();
let name_str = name.to_string_lossy();
if !name_str.starts_with("sidecar-") {
continue;
}
if *name_str == current_dir_name {
continue;
}
if entry.path().is_dir() {
eprintln!(
"[sidecar-rs] Removing old sidecar: {}",
entry.path().display()
);
if let Err(e) = std::fs::remove_dir_all(entry.path()) {
eprintln!(
"[sidecar-rs] Failed to remove {}: {e}",
entry.path().display()
);
}
}
}
}
/// Find a working Python command for the current platform.
fn find_python_command() -> &'static str {
if cfg!(target_os = "windows") {
@@ -295,8 +114,15 @@ impl SidecarManager {
if Self::is_dev_mode() {
self.start_python_dev()
} else {
let path = Self::resolve_sidecar_path()?;
self.start_binary(&path)
match Self::resolve_sidecar_path() {
Ok(path) => self.start_binary(&path),
Err(e) => {
eprintln!(
"[sidecar-rs] Frozen binary not found ({e}), falling back to dev mode"
);
self.start_python_dev()
}
}
}
}
@@ -305,66 +131,15 @@ impl SidecarManager {
self.stop().ok();
eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
// Log sidecar stderr to a file for diagnostics
let stderr_cfg = if let Some(data_dir) = DATA_DIR.get() {
let _ = std::fs::create_dir_all(data_dir);
let log_path = data_dir.join("sidecar.log");
eprintln!("[sidecar-rs] Sidecar stderr → {}", log_path.display());
match std::fs::File::create(&log_path) {
Ok(f) => Stdio::from(f),
Err(e) => {
eprintln!("[sidecar-rs] Failed to create sidecar.log: {e}");
Stdio::inherit()
}
}
} else {
eprintln!("[sidecar-rs] DATA_DIR not set, sidecar stderr will not be logged");
Stdio::inherit()
};
let mut cmd = Command::new(path);
cmd.stdin(Stdio::piped())
let child = Command::new(path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(stderr_cfg);
.stderr(Stdio::inherit())
.spawn()
.map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
// Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
#[cfg(target_os = "windows")]
cmd.creation_flags(0x08000000);
match cmd.spawn() {
Ok(child) => {
self.attach(child)?;
self.wait_for_ready()
}
Err(e) if e.raw_os_error() == Some(13) => {
// Permission denied — fix permissions and retry once
eprintln!("[sidecar-rs] Permission denied, fixing permissions and retrying...");
#[cfg(unix)]
if let Some(dir) = path.parent() {
Self::set_executable_permissions(dir);
}
let mut retry_cmd = Command::new(path);
retry_cmd
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(if let Some(data_dir) = DATA_DIR.get() {
let log_path = data_dir.join("sidecar.log");
std::fs::File::create(&log_path)
.map(Stdio::from)
.unwrap_or_else(|_| Stdio::inherit())
} else {
Stdio::inherit()
});
#[cfg(target_os = "windows")]
retry_cmd.creation_flags(0x08000000);
let child = retry_cmd
.spawn()
.map_err(|e| format!("Failed to start sidecar binary after chmod: {e}"))?;
self.attach(child)?;
self.wait_for_ready()
}
Err(e) => Err(format!("Failed to start sidecar binary: {e}")),
}
self.attach(child)?;
self.wait_for_ready()
}
/// Spawn the Python sidecar in dev mode (system Python).
@@ -425,22 +200,7 @@ impl SidecarManager {
.read_line(&mut line)
.map_err(|e| format!("Read error: {e}"))?;
if bytes == 0 {
// Try to get the exit code for diagnostics
let exit_info = {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = *proc {
match child.try_wait() {
Ok(Some(status)) => format!(" (exit status: {status})"),
_ => String::new(),
}
} else {
String::new()
}
};
return Err(format!(
"Sidecar closed stdout before sending ready{exit_info}. \
The Python sidecar may have crashed on startup — check app logs for details."
));
return Err("Sidecar closed stdout before sending ready".to_string());
}
let trimmed = line.trim();
if trimmed.is_empty() {
@@ -470,46 +230,11 @@ impl SidecarManager {
/// Send a message and receive the response, calling a callback for intermediate messages.
/// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
///
/// If the sidecar has crashed (broken pipe), automatically restarts it and retries once.
pub fn send_and_receive_with_progress<F>(
&self,
msg: &IPCMessage,
on_intermediate: F,
) -> Result<IPCMessage, String>
where
F: Fn(&IPCMessage),
{
match self.send_and_receive_inner(msg, &on_intermediate) {
Ok(response) => Ok(response),
Err(e)
if e.contains("Write error")
|| e.contains("closed stdout")
|| e.contains("not available") =>
{
eprintln!("[sidecar-rs] Sidecar communication failed ({e}), restarting...");
self.cleanup_handles();
// Stop any zombie process
{
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = proc.take() {
let _ = child.kill();
let _ = child.wait();
}
}
self.ensure_running()?;
self.send_and_receive_inner(msg, &on_intermediate)
}
Err(e) => Err(e),
}
}
/// Inner implementation of send_and_receive.
fn send_and_receive_inner<F>(
&self,
msg: &IPCMessage,
on_intermediate: &F,
) -> Result<IPCMessage, String>
where
F: Fn(&IPCMessage),
{
@@ -595,39 +320,8 @@ impl SidecarManager {
}
pub fn is_running(&self) -> bool {
let mut proc = match self.process.lock() {
Ok(p) => p,
Err(_) => return false,
};
if let Some(ref mut child) = *proc {
// Check if the process has exited
match child.try_wait() {
Ok(Some(_status)) => {
// Process has exited — clean up handles
eprintln!("[sidecar-rs] Sidecar process has exited");
drop(proc);
let _ = self.cleanup_handles();
false
}
Ok(None) => true, // Still running
Err(_) => false,
}
} else {
false
}
}
/// Clean up stdin/stdout/process handles after the sidecar has exited.
fn cleanup_handles(&self) {
if let Ok(mut s) = self.stdin.lock() {
*s = None;
}
if let Ok(mut r) = self.reader.lock() {
*r = None;
}
if let Ok(mut p) = self.process.lock() {
*p = None;
}
let proc = self.process.lock().ok();
proc.map_or(false, |p| p.is_some())
}
}

View File

@@ -1,7 +1,7 @@
{
"$schema": "https://schema.tauri.app/config/2",
"productName": "Voice to Notes",
"version": "0.2.46",
"version": "0.1.0",
"identifier": "com.voicetonotes.app",
"build": {
"beforeDevCommand": "npm run dev",
@@ -22,7 +22,7 @@
}
],
"security": {
"csp": "default-src 'self' http://tauri.localhost; connect-src ipc: http://ipc.localhost http://asset.localhost https://asset.localhost blob:; img-src 'self' asset: http://asset.localhost https://asset.localhost blob:; media-src 'self' asset: http://asset.localhost https://asset.localhost blob:; style-src 'self' 'unsafe-inline'",
"csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
"assetProtocol": {
"enable": true,
"scope": ["**"]
@@ -31,7 +31,7 @@
},
"bundle": {
"active": true,
"targets": ["deb", "rpm", "nsis", "msi", "dmg"],
"targets": "all",
"icon": [
"icons/32x32.png",
"icons/128x128.png",
@@ -42,18 +42,17 @@
"category": "Utility",
"shortDescription": "Transcribe audio/video with speaker identification",
"longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
"resources": [],
"copyright": "Voice to Notes Contributors",
"license": "MIT",
"linux": {
"deb": {
"depends": []
},
"appimage": {
"bundleMediaFramework": true
}
},
"windows": {
"nsis": {
"installerHooks": "nsis-hooks.nsh"
},
"wix": {
"language": "en-US"
}

View File

@@ -1,7 +1,7 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { segments, speakers } from '$lib/stores/transcript';
import { settings, configureAIProvider } from '$lib/stores/settings';
import { settings } from '$lib/stores/settings';
interface ChatMessage {
role: 'user' | 'assistant';
@@ -45,12 +45,22 @@
}));
// Ensure the provider is configured with current credentials before chatting
await configureAIProvider($settings);
const s = $settings;
const configMap: Record<string, Record<string, string>> = {
openai: { api_key: s.openai_api_key, model: s.openai_model },
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
};
const config = configMap[s.ai_provider];
if (config) {
await invoke('ai_configure', { provider: s.ai_provider, config });
}
const result = await invoke<{ response: string }>('ai_chat', {
messages: chatMessages,
transcriptContext: getTranscriptContext(),
provider: $settings.ai_provider,
provider: s.ai_provider,
});
messages = [...messages, { role: 'assistant', content: result.response }];

View File

@@ -4,25 +4,9 @@
percent?: number;
stage?: string;
message?: string;
onCancel?: () => void;
}
let { visible = false, percent = 0, stage = '', message = '', onCancel }: Props = $props();
let showConfirm = $state(false);
function handleCancelClick() {
showConfirm = true;
}
function confirmCancel() {
showConfirm = false;
onCancel?.();
}
function dismissCancel() {
showConfirm = false;
}
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
// Pipeline steps in order
const pipelineSteps = [
@@ -105,20 +89,6 @@
<p class="status-text">{message || 'Please wait...'}</p>
<p class="hint-text">This may take several minutes for large files</p>
{#if onCancel && !showConfirm}
<button class="cancel-btn" onclick={handleCancelClick}>Cancel</button>
{/if}
{#if showConfirm}
<div class="confirm-box">
<p class="confirm-text">Processing is incomplete. If you cancel now, the transcription will need to be started over.</p>
<div class="confirm-actions">
<button class="confirm-keep" onclick={dismissCancel}>Continue Processing</button>
<button class="confirm-cancel" onclick={confirmCancel}>Cancel Processing</button>
</div>
</div>
{/if}
</div>
</div>
{/if}
@@ -204,62 +174,4 @@
font-size: 0.75rem;
color: #555;
}
.cancel-btn {
margin-top: 1.25rem;
width: 100%;
padding: 0.5rem;
background: none;
border: 1px solid #4a5568;
color: #999;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
}
.cancel-btn:hover {
color: #e0e0e0;
border-color: #e94560;
}
.confirm-box {
margin-top: 1.25rem;
padding: 0.75rem;
background: rgba(233, 69, 96, 0.08);
border: 1px solid #e94560;
border-radius: 6px;
}
.confirm-text {
margin: 0 0 0.75rem;
font-size: 0.8rem;
color: #e0e0e0;
line-height: 1.4;
}
.confirm-actions {
display: flex;
gap: 0.5rem;
}
.confirm-keep {
flex: 1;
padding: 0.4rem;
background: #0f3460;
border: 1px solid #4a5568;
color: #e0e0e0;
border-radius: 4px;
cursor: pointer;
font-size: 0.8rem;
}
.confirm-keep:hover {
background: #1a4a7a;
}
.confirm-cancel {
flex: 1;
padding: 0.4rem;
background: #e94560;
border: none;
color: white;
border-radius: 4px;
cursor: pointer;
font-size: 0.8rem;
}
.confirm-cancel:hover {
background: #d63851;
}
</style>

View File

@@ -11,7 +11,7 @@
let { visible, onClose }: Props = $props();
let localSettings = $state<AppSettings>({ ...$settings });
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'debug'>('transcription');
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
let modelError = $state('');
let revealedFields = $state<Set<string>>(new Set());
@@ -81,8 +81,8 @@
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
AI Provider
</button>
<button class="tab" class:active={activeTab === 'debug'} onclick={() => activeTab = 'debug'}>
Debug
<button class="tab" class:active={activeTab === 'local'} onclick={() => activeTab = 'local'}>
Local AI
</button>
</div>
@@ -181,27 +181,14 @@
<div class="field">
<label for="ai-provider">AI Provider</label>
<select id="ai-provider" bind:value={localSettings.ai_provider}>
<option value="local">Ollama</option>
<option value="local">Local (llama-server)</option>
<option value="openai">OpenAI</option>
<option value="anthropic">Anthropic</option>
<option value="litellm">OpenAI Compatible</option>
</select>
</div>
{#if localSettings.ai_provider === 'local'}
<div class="field">
<label for="ollama-url">Ollama URL</label>
<input id="ollama-url" type="text" bind:value={localSettings.ollama_url} placeholder="http://localhost:11434" />
</div>
<div class="field">
<label for="ollama-model">Model</label>
<input id="ollama-model" type="text" bind:value={localSettings.ollama_model} placeholder="llama3.2" />
</div>
<p class="hint">
Install Ollama from ollama.com, then pull a model with <code>ollama pull llama3.2</code>.
The app connects via Ollama's OpenAI-compatible API.
</p>
{:else if localSettings.ai_provider === 'openai'}
{#if localSettings.ai_provider === 'openai'}
<div class="field">
<label for="openai-key">OpenAI API Key</label>
<div class="input-reveal">
@@ -242,21 +229,19 @@
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
</div>
{/if}
{:else if activeTab === 'debug'}
<div class="field checkbox">
<label>
<input
type="checkbox"
checked={localSettings.devtools_enabled}
onchange={async (e) => {
localSettings.devtools_enabled = (e.target as HTMLInputElement).checked;
await invoke('toggle_devtools', { open: localSettings.devtools_enabled });
}}
/>
Enable Developer Tools
</label>
<p class="hint">Opens the browser inspector for debugging. Changes take effect immediately.</p>
{:else}
<div class="field">
<label for="llama-binary">llama-server Binary Path</label>
<input id="llama-binary" type="text" bind:value={localSettings.local_binary_path} placeholder="llama-server" />
</div>
<div class="field">
<label for="llama-model">GGUF Model Path</label>
<input id="llama-model" type="text" bind:value={localSettings.local_model_path} placeholder="~/.voicetonotes/models/model.gguf" />
</div>
<p class="hint">
Place GGUF model files in ~/.voicetonotes/models/ for auto-detection.
The local AI server uses the OpenAI-compatible API from llama.cpp.
</p>
{/if}
</div>

View File

@@ -1,320 +0,0 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { listen } from '@tauri-apps/api/event';
import type { UnlistenFn } from '@tauri-apps/api/event';
import { onMount } from 'svelte';
interface Props {
onComplete: () => void;
}
let { onComplete }: Props = $props();
let variant = $state<'cpu' | 'cuda'>('cpu');
let downloading = $state(false);
let downloadProgress = $state({ downloaded: 0, total: 0, percent: 0 });
let error = $state('');
let success = $state(false);
let unlisten: UnlistenFn | null = null;
onMount(() => {
return () => {
unlisten?.();
};
});
function formatBytes(bytes: number): string {
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
}
async function startDownload() {
downloading = true;
error = '';
success = false;
unlisten = await listen<{ downloaded: number; total: number; percent: number }>(
'sidecar-download-progress',
(event) => {
downloadProgress = event.payload;
}
);
try {
await invoke('download_sidecar', { variant });
success = true;
// Brief pause so the user sees "Complete" before the screen goes away
setTimeout(() => {
onComplete();
}, 800);
} catch (err) {
error = String(err);
} finally {
downloading = false;
unlisten?.();
unlisten = null;
}
}
</script>
<div class="setup-overlay">
<div class="setup-card">
<h1 class="app-title">Voice to Notes</h1>
<h2 class="setup-heading">First-Time Setup</h2>
<p class="setup-description">
Voice to Notes needs to download its AI engine to transcribe audio.
</p>
{#if !downloading && !success}
<div class="variant-options">
<label class="variant-option" class:selected={variant === 'cpu'}>
<input type="radio" name="variant" value="cpu" bind:group={variant} />
<div class="variant-info">
<span class="variant-label">Standard (CPU)</span>
<span class="variant-desc">Works on all computers (~500 MB download)</span>
</div>
</label>
<label class="variant-option" class:selected={variant === 'cuda'}>
<input type="radio" name="variant" value="cuda" bind:group={variant} />
<div class="variant-info">
<span class="variant-label">GPU Accelerated (CUDA)</span>
<span class="variant-desc">Faster transcription with NVIDIA GPU (~2 GB download)</span>
</div>
</label>
</div>
{#if error}
<div class="error-box">
<p class="error-text">{error}</p>
<button class="btn-retry" onclick={startDownload}>Retry</button>
</div>
{:else}
<button class="btn-download" onclick={startDownload}>
Download &amp; Install
</button>
{/if}
{:else if downloading}
<div class="progress-section">
<div class="progress-bar-track">
<div class="progress-bar-fill" style="width: {downloadProgress.percent}%"></div>
</div>
<p class="progress-text">
{downloadProgress.percent}% — {formatBytes(downloadProgress.downloaded)} / {formatBytes(downloadProgress.total)}
</p>
<p class="progress-hint">Downloading {variant === 'cuda' ? 'GPU' : 'CPU'} engine...</p>
</div>
{:else if success}
<div class="success-section">
<div class="success-icon">&#10003;</div>
<p class="success-text">Setup complete!</p>
</div>
{/if}
</div>
</div>
<style>
.setup-overlay {
position: fixed;
inset: 0;
background: #0a0a23;
display: flex;
align-items: center;
justify-content: center;
z-index: 10000;
}
.setup-card {
background: #16213e;
border: 1px solid #2a3a5e;
border-radius: 12px;
padding: 2.5rem 3rem;
max-width: 480px;
width: 90vw;
color: #e0e0e0;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
text-align: center;
}
.app-title {
font-size: 1.8rem;
margin: 0 0 0.25rem;
color: #e94560;
font-weight: 700;
}
.setup-heading {
font-size: 1.1rem;
margin: 0 0 0.75rem;
color: #e0e0e0;
font-weight: 500;
}
.setup-description {
font-size: 0.9rem;
color: #b0b0b0;
margin: 0 0 1.5rem;
line-height: 1.5;
}
.variant-options {
display: flex;
flex-direction: column;
gap: 0.75rem;
margin-bottom: 1.5rem;
text-align: left;
}
.variant-option {
display: flex;
align-items: flex-start;
gap: 0.75rem;
padding: 0.85rem 1rem;
border: 1px solid #2a3a5e;
border-radius: 8px;
cursor: pointer;
transition: border-color 0.15s, background 0.15s;
}
.variant-option:hover {
border-color: #4a5568;
background: rgba(255, 255, 255, 0.02);
}
.variant-option.selected {
border-color: #e94560;
background: rgba(233, 69, 96, 0.08);
}
.variant-option input[type='radio'] {
margin-top: 0.2rem;
accent-color: #e94560;
flex-shrink: 0;
}
.variant-info {
display: flex;
flex-direction: column;
gap: 0.2rem;
}
.variant-label {
font-size: 0.9rem;
font-weight: 500;
color: #e0e0e0;
}
.variant-desc {
font-size: 0.78rem;
color: #888;
}
.btn-download {
background: #e94560;
border: none;
color: white;
padding: 0.7rem 1.5rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.9rem;
font-weight: 500;
width: 100%;
transition: background 0.15s;
}
.btn-download:hover {
background: #d63851;
}
.progress-section {
margin-top: 0.5rem;
}
.progress-bar-track {
width: 100%;
height: 8px;
background: #1a1a2e;
border-radius: 4px;
overflow: hidden;
border: 1px solid #2a3a5e;
}
.progress-bar-fill {
height: 100%;
background: #e94560;
border-radius: 4px;
transition: width 0.3s ease;
}
.progress-text {
margin: 0.75rem 0 0;
font-size: 0.85rem;
color: #e0e0e0;
font-variant-numeric: tabular-nums;
}
.progress-hint {
margin: 0.35rem 0 0;
font-size: 0.78rem;
color: #888;
}
.error-box {
background: rgba(233, 69, 96, 0.1);
border: 1px solid rgba(233, 69, 96, 0.3);
border-radius: 8px;
padding: 1rem;
}
.error-text {
color: #e94560;
font-size: 0.85rem;
margin: 0 0 0.75rem;
word-break: break-word;
line-height: 1.4;
}
.btn-retry {
background: #e94560;
border: none;
color: white;
padding: 0.5rem 1.25rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
font-weight: 500;
}
.btn-retry:hover {
background: #d63851;
}
.success-section {
display: flex;
flex-direction: column;
align-items: center;
gap: 0.5rem;
padding: 1rem 0;
}
.success-icon {
width: 48px;
height: 48px;
border-radius: 50%;
background: rgba(78, 205, 196, 0.15);
color: #4ecdc4;
display: flex;
align-items: center;
justify-content: center;
font-size: 1.5rem;
font-weight: 700;
}
.success-text {
color: #4ecdc4;
font-size: 1rem;
margin: 0;
font-weight: 500;
}
</style>

View File

@@ -272,9 +272,7 @@
<style>
.transcript-editor {
flex: 1;
min-width: 0;
overflow-y: auto;
overflow-x: hidden;
padding: 1rem;
background: #16213e;
border-radius: 8px;
@@ -321,7 +319,6 @@
.segment-text {
line-height: 1.6;
padding-left: 0.75rem;
white-space: pre-wrap;
word-wrap: break-word;
overflow-wrap: break-word;
}

View File

@@ -57,12 +57,6 @@
isReady = false;
});
wavesurfer.on('error', (err: Error) => {
console.error('[voice-to-notes] WaveSurfer error:', err);
isLoading = false;
loadError = 'Failed to load audio';
});
if (audioUrl) {
loadAudio(audioUrl);
}

View File

@@ -10,15 +10,14 @@ export interface AppSettings {
litellm_model: string;
litellm_api_key: string;
litellm_api_base: string;
ollama_url: string;
ollama_model: string;
local_model_path: string;
local_binary_path: string;
transcription_model: string;
transcription_device: string;
transcription_language: string;
skip_diarization: boolean;
hf_token: string;
num_speakers: number | null;
devtools_enabled: boolean;
}
const defaults: AppSettings = {
@@ -30,15 +29,14 @@ const defaults: AppSettings = {
litellm_model: 'gpt-4o-mini',
litellm_api_key: '',
litellm_api_base: '',
ollama_url: 'http://localhost:11434',
ollama_model: 'llama3.2',
local_model_path: '',
local_binary_path: 'llama-server',
transcription_model: 'base',
transcription_device: 'cpu',
transcription_language: '',
skip_diarization: false,
hf_token: '',
num_speakers: null,
devtools_enabled: false,
};
export const settings = writable<AppSettings>({ ...defaults });
@@ -52,27 +50,23 @@ export async function loadSettings(): Promise<void> {
}
}
export async function configureAIProvider(s: AppSettings): Promise<void> {
export async function saveSettings(s: AppSettings): Promise<void> {
settings.set(s);
await invoke('save_settings', { settings: s });
// Configure the AI provider in the Python sidecar
const configMap: Record<string, Record<string, string>> = {
openai: { api_key: s.openai_api_key, model: s.openai_model },
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
local: { model: s.ollama_model, base_url: s.ollama_url.replace(/\/+$/, '') + '/v1' },
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
};
const config = configMap[s.ai_provider];
if (config) {
try {
await invoke('ai_configure', { provider: s.ai_provider, config });
} catch {
// Sidecar may not be running yet
// Sidecar may not be running yet — provider will be configured on first use
}
}
}
export async function saveSettings(s: AppSettings): Promise<void> {
settings.set(s);
await invoke('save_settings', { settings: s });
// Configure the AI provider in the Python sidecar
await configureAIProvider(s);
}

View File

@@ -8,9 +8,8 @@
import AIChatPanel from '$lib/components/AIChatPanel.svelte';
import ProgressOverlay from '$lib/components/ProgressOverlay.svelte';
import SettingsModal from '$lib/components/SettingsModal.svelte';
import SidecarSetup from '$lib/components/SidecarSetup.svelte';
import { segments, speakers } from '$lib/stores/transcript';
import { settings, loadSettings, configureAIProvider } from '$lib/stores/settings';
import { settings, loadSettings } from '$lib/stores/settings';
import type { Segment, Speaker } from '$lib/types/transcript';
import { onMount, tick } from 'svelte';
@@ -19,65 +18,13 @@
let audioUrl = $state('');
let showSettings = $state(false);
// Sidecar state
let sidecarReady = $state(false);
let sidecarChecked = $state(false);
// Sidecar update state
let sidecarUpdate = $state<{ current_version: string; latest_version: string } | null>(null);
let showUpdateDownload = $state(false);
let updateDismissed = $state(false);
// Project management state
let currentProjectPath = $state<string | null>(null);
let currentProjectName = $state('');
let projectIsV2 = $state(false);
let audioFilePath = $state('');
let audioWavPath = $state('');
async function checkSidecar() {
try {
const ready = await invoke<boolean>('check_sidecar');
sidecarReady = ready;
} catch {
sidecarReady = false;
}
sidecarChecked = true;
}
async function checkSidecarUpdate() {
try {
const update = await invoke<{ current_version: string; latest_version: string } | null>('check_sidecar_update');
sidecarUpdate = update;
} catch {
// Silently ignore update check failures
}
}
function handleSidecarSetupComplete() {
sidecarReady = true;
configureAIProvider($settings);
checkSidecarUpdate();
}
function handleUpdateComplete() {
showUpdateDownload = false;
sidecarUpdate = null;
}
onMount(() => {
loadSettings().then(() => {
// Restore devtools state from settings
if ($settings.devtools_enabled) {
invoke('toggle_devtools', { open: true });
}
});
checkSidecar().then(() => {
if (sidecarReady) {
configureAIProvider($settings);
checkSidecarUpdate();
}
});
loadSettings();
// Global keyboard shortcuts
function handleKeyDown(e: KeyboardEvent) {
@@ -121,32 +68,25 @@
};
});
let isTranscribing = $state(false);
let transcriptionCancelled = $state(false);
let transcriptionProgress = $state(0);
let transcriptionStage = $state('');
let transcriptionMessage = $state('');
let extractingAudio = $state(false);
function handleCancelProcessing() {
transcriptionCancelled = true;
isTranscribing = false;
transcriptionProgress = 0;
transcriptionStage = '';
transcriptionMessage = '';
// Clear any partial results
segments.set([]);
speakers.set([]);
}
// Speaker color palette for auto-assignment
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
function buildProjectData(projectName: string) {
return {
version: 2,
name: projectName,
source_file: audioFilePath,
audio_wav: 'audio.wav',
async function saveProject() {
const defaultName = currentProjectName || 'Untitled';
const outputPath = await save({
defaultPath: `${defaultName}.vtn`,
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
});
if (!outputPath) return;
const projectData = {
version: 1,
name: outputPath.split(/[\\/]/).pop()?.replace('.vtn', '') || defaultName,
audio_file: audioFilePath,
created_at: new Date().toISOString(),
segments: $segments.map(seg => {
const speaker = $speakers.find(s => s.id === seg.speaker_id);
@@ -170,75 +110,17 @@
color: s.color || '#e94560',
})),
};
}
/** Save to a specific folder — creates .vtn + audio.wav inside it. */
async function saveToFolder(folderPath: string): Promise<boolean> {
const projectName = folderPath.split(/[\\/]/).pop() || currentProjectName || 'Untitled';
const vtnPath = `${folderPath}/${projectName}.vtn`;
const wavPath = `${folderPath}/audio.wav`;
const projectData = buildProjectData(projectName);
try {
await invoke('create_dir', { path: folderPath });
if (audioWavPath && audioWavPath !== wavPath) {
await invoke('copy_file', { src: audioWavPath, dst: wavPath });
audioWavPath = wavPath;
}
await invoke('save_project_file', { path: vtnPath, project: projectData });
currentProjectPath = vtnPath;
currentProjectName = projectName;
projectIsV2 = true;
return true;
await invoke('save_project_file', { path: outputPath, project: projectData });
currentProjectPath = outputPath;
currentProjectName = projectData.name;
} catch (err) {
console.error('Failed to save project:', err);
alert(`Failed to save: ${err}`);
return false;
}
}
async function saveProject() {
// Already saved as v2 folder — save in place
if (currentProjectPath && projectIsV2) {
const folderPath = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
await saveToFolder(folderPath);
return;
}
// V1 project opened — migrate to folder structure
if (currentProjectPath && !projectIsV2) {
const oldVtnDir = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
const projectName = currentProjectPath.split(/[\\/]/).pop()?.replace(/\.vtn$/i, '') || 'Untitled';
const folderPath = `${oldVtnDir}/${projectName}`;
const success = await saveToFolder(folderPath);
if (success) {
// Optionally remove the old .vtn file
try {
// Leave old file — user can delete manually
} catch {}
}
return;
}
// Never saved — pick a folder
await saveProjectAs();
}
async function saveProjectAs() {
// Use save dialog so the user can type a new project name.
// The chosen path is treated as the project folder (created if needed).
const defaultName = currentProjectName || 'Untitled';
const chosenPath = await save({
defaultPath: defaultName,
title: 'Save Project — enter a project name',
});
if (!chosenPath) return;
// Strip any file extension the user may have typed (e.g. ".vtn")
const folderPath = chosenPath.replace(/\.[^.\\/]+$/, '');
await saveToFolder(folderPath);
}
async function openProject() {
const filePath = await open({
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
@@ -248,11 +130,9 @@
try {
const project = await invoke<{
version?: number;
version: number;
name: string;
audio_file?: string;
source_file?: string;
audio_wav?: string;
audio_file: string;
segments: Array<{
text: string;
start_ms: number;
@@ -302,135 +182,10 @@
}));
segments.set(newSegments);
// Determine the directory the .vtn file is in
const vtnDir = (filePath as string).replace(/[\\/][^\\/]+$/, '');
const version = project.version ?? 1;
projectIsV2 = version >= 2;
// Resolve audio for wavesurfer playback
if (version >= 2) {
// Version 2: audio_wav is relative to the .vtn directory, source_file is the original import path
audioFilePath = project.source_file || '';
const wavRelative = project.audio_wav || 'audio.wav';
const resolvedWav = `${vtnDir}/${wavRelative}`;
const wavExists = await invoke<boolean>('check_file_exists', { path: resolvedWav });
if (wavExists) {
audioWavPath = resolvedWav;
audioUrl = convertFileSrc(resolvedWav);
waveformPlayer?.loadAudio(audioUrl);
} else {
// WAV missing — try re-extracting from the original source file
const sourceExists = audioFilePath ? await invoke<boolean>('check_file_exists', { path: audioFilePath }) : false;
if (sourceExists) {
extractingAudio = true;
await tick();
try {
const outputPath = `${vtnDir}/${wavRelative}`;
const wavPath = await invoke<string>('extract_audio', { filePath: audioFilePath, outputPath });
audioWavPath = wavPath;
audioUrl = convertFileSrc(wavPath);
waveformPlayer?.loadAudio(audioUrl);
} catch (err) {
console.error('Failed to re-extract audio:', err);
alert(`Failed to re-extract audio: ${err}`);
} finally {
extractingAudio = false;
}
} else {
// Both missing — ask user to locate the file
const shouldRelink = confirm(
'The audio file for this project could not be found.\n\n' +
`Original source: ${audioFilePath || '(unknown)'}\n\n` +
'Would you like to locate the file?'
);
if (shouldRelink) {
const newPath = await open({
multiple: false,
filters: [{
name: 'Audio/Video',
extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
'mp4', 'mkv', 'avi', 'mov', 'webm'],
}],
});
if (newPath) {
audioFilePath = newPath;
extractingAudio = true;
await tick();
try {
const outputPath = `${vtnDir}/${wavRelative}`;
const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
audioWavPath = wavPath;
audioUrl = convertFileSrc(wavPath);
waveformPlayer?.loadAudio(audioUrl);
} catch (err) {
console.error('Failed to extract audio from re-linked file:', err);
alert(`Failed to extract audio: ${err}`);
} finally {
extractingAudio = false;
}
}
}
}
}
} else {
// Version 1 (legacy): audio_file is the source path
const sourceFile = project.audio_file || '';
audioFilePath = sourceFile;
const sourceExists = sourceFile ? await invoke<boolean>('check_file_exists', { path: sourceFile }) : false;
if (sourceExists) {
// Extract WAV next to the .vtn file for playback
extractingAudio = true;
await tick();
try {
const outputPath = `${vtnDir}/audio.wav`;
const wavPath = await invoke<string>('extract_audio', { filePath: sourceFile, outputPath });
audioWavPath = wavPath;
audioUrl = convertFileSrc(wavPath);
waveformPlayer?.loadAudio(audioUrl);
} catch (err) {
console.error('Failed to extract audio:', err);
alert(`Failed to extract audio: ${err}`);
} finally {
extractingAudio = false;
}
} else {
// Source missing — ask user to locate the file
const shouldRelink = confirm(
'The audio file for this project could not be found.\n\n' +
`Original path: ${sourceFile || '(unknown)'}\n\n` +
'Would you like to locate the file?'
);
if (shouldRelink) {
const newPath = await open({
multiple: false,
filters: [{
name: 'Audio/Video',
extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
'mp4', 'mkv', 'avi', 'mov', 'webm'],
}],
});
if (newPath) {
audioFilePath = newPath;
extractingAudio = true;
await tick();
try {
const outputPath = `${vtnDir}/audio.wav`;
const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
audioWavPath = wavPath;
audioUrl = convertFileSrc(wavPath);
waveformPlayer?.loadAudio(audioUrl);
} catch (err) {
console.error('Failed to extract audio from re-linked file:', err);
alert(`Failed to extract audio: ${err}`);
} finally {
extractingAudio = false;
}
}
}
}
}
// Load audio
audioFilePath = project.audio_file;
audioUrl = convertFileSrc(project.audio_file);
waveformPlayer?.loadAudio(audioUrl);
currentProjectPath = filePath as string;
currentProjectName = project.name;
@@ -461,35 +216,9 @@
});
if (!filePath) return;
// Always extract audio to WAV for wavesurfer playback
extractingAudio = true;
await tick();
try {
const wavPath = await invoke<string>('extract_audio', { filePath });
audioWavPath = wavPath;
} catch (err) {
console.error('[voice-to-notes] Failed to extract audio:', err);
const msg = String(err);
if (msg.includes('ffmpeg not found')) {
alert(
'FFmpeg is required to extract audio.\n\n' +
'Install FFmpeg:\n' +
' Windows: winget install ffmpeg\n' +
' macOS: brew install ffmpeg\n' +
' Linux: sudo apt install ffmpeg\n\n' +
'Then restart Voice to Notes and try again.'
);
} else {
alert(`Failed to extract audio: ${msg}`);
}
return;
} finally {
extractingAudio = false;
}
// Track the original file path for the sidecar (it does its own conversion)
// Track the original file path and convert to asset URL for wavesurfer
audioFilePath = filePath;
audioUrl = convertFileSrc(audioWavPath);
audioUrl = convertFileSrc(filePath);
waveformPlayer?.loadAudio(audioUrl);
// Clear previous results
@@ -498,7 +227,6 @@
// Start pipeline (transcription + diarization)
isTranscribing = true;
transcriptionCancelled = false;
transcriptionProgress = 0;
transcriptionStage = 'Starting...';
transcriptionMessage = 'Initializing pipeline...';
@@ -609,9 +337,6 @@
numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
});
// If cancelled while processing, discard results
if (transcriptionCancelled) return;
// Create speaker entries from pipeline result
const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
id: `speaker-${idx}`,
@@ -718,31 +443,14 @@
}
</script>
{#if !appReady || !sidecarChecked}
{#if !appReady}
<div class="splash-screen">
<h1 class="splash-title">Voice to Notes</h1>
<p class="splash-subtitle">Loading...</p>
<div class="splash-spinner"></div>
</div>
{:else if sidecarChecked && !sidecarReady && !showUpdateDownload}
<SidecarSetup onComplete={handleSidecarSetupComplete} />
{:else if showUpdateDownload}
<SidecarSetup onComplete={handleUpdateComplete} />
{:else}
<div class="app-shell">
{#if sidecarUpdate && !updateDismissed}
<div class="update-banner">
<span class="update-text">
Sidecar update available (v{sidecarUpdate.current_version} &rarr; v{sidecarUpdate.latest_version})
</span>
<button class="update-btn" onclick={() => showUpdateDownload = true}>
Update
</button>
<button class="update-dismiss" onclick={() => updateDismissed = true} title="Dismiss">
&times;
</button>
</div>
{/if}
<div class="app-header">
<div class="header-actions">
<button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
@@ -750,10 +458,7 @@
</button>
{#if $segments.length > 0}
<button class="settings-btn" onclick={saveProject}>
Save
</button>
<button class="settings-btn" onclick={saveProjectAs}>
Save As
Save Project
</button>
{/if}
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
@@ -802,18 +507,8 @@
percent={transcriptionProgress}
stage={transcriptionStage}
message={transcriptionMessage}
onCancel={handleCancelProcessing}
/>
{#if extractingAudio}
<div class="extraction-overlay">
<div class="extraction-card">
<div class="extraction-spinner"></div>
<p>Extracting audio...</p>
</div>
</div>
{/if}
<SettingsModal
visible={showSettings}
onClose={() => showSettings = false}
@@ -979,80 +674,4 @@
@keyframes spin {
to { transform: rotate(360deg); }
}
/* Sidecar update banner */
.update-banner {
display: flex;
align-items: center;
gap: 0.75rem;
padding: 0.5rem 1rem;
background: rgba(78, 205, 196, 0.1);
border-bottom: 1px solid rgba(78, 205, 196, 0.25);
color: #e0e0e0;
font-size: 0.85rem;
}
.update-text {
flex: 1;
color: #b0b0b0;
}
.update-btn {
background: #4ecdc4;
border: none;
color: #0a0a23;
padding: 0.3rem 0.85rem;
border-radius: 4px;
cursor: pointer;
font-size: 0.8rem;
font-weight: 600;
}
.update-btn:hover {
background: #3dbdb5;
}
.update-dismiss {
background: none;
border: none;
color: #888;
font-size: 1.1rem;
cursor: pointer;
padding: 0.1rem 0.3rem;
line-height: 1;
}
.update-dismiss:hover {
color: #e0e0e0;
}
/* Audio extraction overlay */
.extraction-overlay {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.8);
display: flex;
align-items: center;
justify-content: center;
z-index: 9999;
}
.extraction-card {
background: #16213e;
padding: 2rem 2.5rem;
border-radius: 12px;
color: #e0e0e0;
border: 1px solid #2a3a5e;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
display: flex;
flex-direction: column;
align-items: center;
gap: 1rem;
}
.extraction-card p {
margin: 0;
font-size: 1rem;
}
.extraction-spinner {
width: 32px;
height: 32px;
border: 3px solid #2a3a5e;
border-top-color: #e94560;
border-radius: 50%;
animation: spin 0.8s linear infinite;
}
</style>