Compare commits
66 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68ae48a771 | ||
|
|
4fed9bccb8 | ||
|
|
ef1481b359 | ||
|
|
ba3d5c3997 | ||
|
|
9a6ea84637 | ||
|
|
011ff4e178 | ||
|
|
d2d111a5c7 | ||
|
|
9250ff25c3 | ||
|
|
9033881274 | ||
|
|
1ed34e0bbb | ||
|
|
b7a00af2e0 | ||
|
|
2d0d4cfc50 | ||
|
|
b99613f452 | ||
|
|
ec7e364165 | ||
|
|
a0cb034ab5 | ||
|
|
52d7d06d84 | ||
|
|
462a4b80f6 | ||
|
|
12869e3757 | ||
|
|
bf6fb471d9 | ||
|
|
50baa7284e | ||
|
|
a3c39a2069 | ||
|
|
f023bf02a9 | ||
|
|
caf854ccbb | ||
|
|
b0d566f2d6 | ||
|
|
a65ac439dd | ||
|
|
b8d70539ec | ||
|
|
760b5dc90e | ||
|
|
9cec3c3858 | ||
|
|
4f19ae5287 | ||
|
|
4701b578fc | ||
| 5b7f30c4b2 | |||
| cc24511b87 | |||
|
|
23c013bbac | ||
| 34797b295d | |||
|
|
666e6c5b25 | ||
|
|
c8754076f4 | ||
| f674b63ca9 | |||
| 66a9033a64 | |||
|
|
3a97aa2831 | ||
|
|
882aa147c7 | ||
|
|
67fc23e8aa | ||
|
|
727107323c | ||
|
|
27b705b5b6 | ||
|
|
8e7d21d22b | ||
|
|
61caa07e4c | ||
|
|
331003d1c9 | ||
|
|
4a4402f71a | ||
|
|
3e7671453a | ||
| d8ed77b786 | |||
|
|
09d7c2064f | ||
|
|
58faa83cb3 | ||
|
|
42ccd3e21d | ||
|
|
0771508203 | ||
|
|
c23b9a90dd | ||
|
|
35af6e9e0c | ||
|
|
c3b6ad38fd | ||
|
|
03af5a189c | ||
|
|
16f4b57771 | ||
|
|
6eb13bce63 | ||
| 67ed69df00 | |||
| 585411f402 | |||
| a3612c986d | |||
| baf820286f | |||
| ed626b8ba0 | |||
| 4d7b9d524f | |||
| 87b3ad94f9 |
21
.claude/settings.local.json
Normal file
21
.claude/settings.local.json
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(git init:*)",
|
||||||
|
"Bash(git:*)",
|
||||||
|
"WebSearch",
|
||||||
|
"Bash(npm create:*)",
|
||||||
|
"Bash(cp:*)",
|
||||||
|
"Bash(npm install:*)",
|
||||||
|
"Bash(/home/jknapp/.cargo/bin/cargo test:*)",
|
||||||
|
"Bash(ruff:*)",
|
||||||
|
"Bash(npm run:*)",
|
||||||
|
"Bash(npx svelte-check:*)",
|
||||||
|
"Bash(pip install:*)",
|
||||||
|
"Bash(python3:*)",
|
||||||
|
"Bash(/home/jknapp/.cargo/bin/cargo check:*)",
|
||||||
|
"Bash(cargo check:*)",
|
||||||
|
"Bash(npm ls:*)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
1
.claude/worktrees/agent-a0bd87d1
Submodule
1
.claude/worktrees/agent-a0bd87d1
Submodule
Submodule .claude/worktrees/agent-a0bd87d1 added at 67ed69df00
1
.claude/worktrees/agent-a198b5f8
Submodule
1
.claude/worktrees/agent-a198b5f8
Submodule
Submodule .claude/worktrees/agent-a198b5f8 added at 6eb13bce63
1
.claude/worktrees/agent-ad3d6fca
Submodule
1
.claude/worktrees/agent-ad3d6fca
Submodule
Submodule .claude/worktrees/agent-ad3d6fca added at 03af5a189c
1
.claude/worktrees/agent-aefe2597
Submodule
1
.claude/worktrees/agent-aefe2597
Submodule
Submodule .claude/worktrees/agent-aefe2597 added at 16f4b57771
395
.gitea/workflows/release.yml
Normal file
395
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,395 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bump-version:
|
||||||
|
name: Bump version and tag
|
||||||
|
# Skip if this is a version-bump commit (avoid infinite loop)
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
new_version: ${{ steps.bump.outputs.new_version }}
|
||||||
|
tag: ${{ steps.bump.outputs.tag }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Configure git
|
||||||
|
run: |
|
||||||
|
git config user.name "Gitea Actions"
|
||||||
|
git config user.email "actions@gitea.local"
|
||||||
|
|
||||||
|
- name: Bump patch version
|
||||||
|
id: bump
|
||||||
|
run: |
|
||||||
|
# Read current version from package.json
|
||||||
|
CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
|
||||||
|
echo "Current version: ${CURRENT}"
|
||||||
|
|
||||||
|
# Increment patch number
|
||||||
|
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
echo "New version: ${NEW_VERSION}"
|
||||||
|
|
||||||
|
# Update package.json
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
|
||||||
|
|
||||||
|
# Update src-tauri/tauri.conf.json
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
|
||||||
|
|
||||||
|
# Update src-tauri/Cargo.toml (match version = "x.y.z" in [package] section)
|
||||||
|
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
|
||||||
|
|
||||||
|
# Update python/pyproject.toml
|
||||||
|
sed -i "s/^version = \".*\"/version = \"${NEW_VERSION}\"/" python/pyproject.toml
|
||||||
|
|
||||||
|
echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and tag
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
NEW_VERSION="${{ steps.bump.outputs.new_version }}"
|
||||||
|
git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml python/pyproject.toml
|
||||||
|
git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
|
||||||
|
git tag "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
# Push using token for authentication
|
||||||
|
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
|
||||||
|
git push "${REMOTE_URL}" HEAD:main
|
||||||
|
git push "${REMOTE_URL}" "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
- name: Create Gitea release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
RELEASE_NAME="Voice to Notes ${TAG}"
|
||||||
|
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
|
||||||
|
"${REPO_API}/releases"
|
||||||
|
echo "Created release: ${RELEASE_NAME}"
|
||||||
|
|
||||||
|
# ── Platform builds (run after version bump) ──
|
||||||
|
|
||||||
|
build-linux:
|
||||||
|
name: Build (Linux)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
# ── Python sidecar ──
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Install ffmpeg
|
||||||
|
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Build sidecar
|
||||||
|
working-directory: python
|
||||||
|
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
|
||||||
|
|
||||||
|
- name: Package sidecar for Tauri
|
||||||
|
run: |
|
||||||
|
cd python/dist/voice-to-notes-sidecar && zip -r ../../../src-tauri/sidecar.zip .
|
||||||
|
|
||||||
|
# ── Tauri app ──
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
# ── Release ──
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
|
||||||
|
TAG="${{ needs.bump-version.outputs.tag }}"
|
||||||
|
RELEASE_NAME="Voice to Notes ${TAG}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG}."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f -name "*.deb" | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
|
|
||||||
|
build-windows:
|
||||||
|
name: Build (Windows)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: windows-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
# ── Python sidecar ──
|
||||||
|
- name: Install uv
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command uv -ErrorAction SilentlyContinue) {
|
||||||
|
Write-Host "uv already installed: $(uv --version)"
|
||||||
|
} else {
|
||||||
|
irm https://astral.sh/uv/install.ps1 | iex
|
||||||
|
echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Install ffmpeg
|
||||||
|
shell: powershell
|
||||||
|
run: choco install ffmpeg -y
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
shell: powershell
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Build sidecar
|
||||||
|
shell: powershell
|
||||||
|
working-directory: python
|
||||||
|
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
|
||||||
|
|
||||||
|
- name: Package sidecar for Tauri
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
Compress-Archive -Path python\dist\voice-to-notes-sidecar\* -DestinationPath src-tauri\sidecar.zip
|
||||||
|
|
||||||
|
# ── Tauri app ──
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command rustup -ErrorAction SilentlyContinue) {
|
||||||
|
rustup default stable
|
||||||
|
} else {
|
||||||
|
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||||
|
.\rustup-init.exe -y --default-toolchain stable
|
||||||
|
echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
shell: powershell
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
shell: powershell
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
# ── Release ──
|
||||||
|
- name: Upload to release
|
||||||
|
shell: powershell
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||||
|
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
|
||||||
|
|
||||||
|
$TAG = "${{ needs.bump-version.outputs.tag }}"
|
||||||
|
$RELEASE_NAME = "Voice to Notes ${TAG}"
|
||||||
|
Write-Host "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
|
||||||
|
$RELEASE_ID = $release.id
|
||||||
|
Write-Host "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
|
||||||
|
$filename = $_.Name
|
||||||
|
$encodedName = [System.Uri]::EscapeDataString($filename)
|
||||||
|
$size = [math]::Round($_.Length / 1MB, 1)
|
||||||
|
Write-Host "Uploading ${filename} (${size} MB)..."
|
||||||
|
|
||||||
|
try {
|
||||||
|
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
|
||||||
|
$existing = $assets | Where-Object { $_.name -eq $filename }
|
||||||
|
if ($existing) {
|
||||||
|
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
# Use curl for streaming upload (Invoke-RestMethod fails on large files)
|
||||||
|
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
|
||||||
|
$result = curl.exe --fail --silent --show-error `
|
||||||
|
-X POST `
|
||||||
|
-H "Authorization: token $env:BUILD_TOKEN" `
|
||||||
|
-H "Content-Type: application/octet-stream" `
|
||||||
|
--data-binary "@$($_.FullName)" `
|
||||||
|
"$uploadUrl" 2>&1
|
||||||
|
if ($LASTEXITCODE -eq 0) {
|
||||||
|
Write-Host "Upload successful: ${filename}"
|
||||||
|
} else {
|
||||||
|
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
build-macos:
|
||||||
|
name: Build (macOS)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: macos-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
# ── Python sidecar ──
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Install ffmpeg
|
||||||
|
run: brew install ffmpeg
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Build sidecar
|
||||||
|
working-directory: python
|
||||||
|
run: uv run --python ${{ env.PYTHON_VERSION }} python build_sidecar.py --cpu-only
|
||||||
|
|
||||||
|
- name: Package sidecar for Tauri
|
||||||
|
run: |
|
||||||
|
cd python/dist/voice-to-notes-sidecar && zip -r ../../../src-tauri/sidecar.zip .
|
||||||
|
|
||||||
|
# ── Tauri app ──
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: brew install --quiet create-dmg || true
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
# ── Release ──
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
which jq || brew install jq
|
||||||
|
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
|
||||||
|
TAG="${{ needs.bump-version.outputs.tag }}"
|
||||||
|
RELEASE_NAME="Voice to Notes ${TAG}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG}."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -46,3 +46,10 @@ Thumbs.db
|
|||||||
*.ogg
|
*.ogg
|
||||||
*.flac
|
*.flac
|
||||||
!test/fixtures/*
|
!test/fixtures/*
|
||||||
|
|
||||||
|
# Sidecar build artifacts
|
||||||
|
src-tauri/binaries/*
|
||||||
|
!src-tauri/binaries/.gitkeep
|
||||||
|
src-tauri/sidecar.zip
|
||||||
|
python/dist/
|
||||||
|
python/build/
|
||||||
|
|||||||
14
CLAUDE.md
14
CLAUDE.md
@@ -8,7 +8,7 @@ Desktop app for transcribing audio/video with speaker identification. Runs local
|
|||||||
- **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
|
- **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
|
||||||
- **Database:** SQLite (via rusqlite in Rust)
|
- **Database:** SQLite (via rusqlite in Rust)
|
||||||
- **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
|
- **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
|
||||||
- **Cloud AI providers:** LiteLLM, OpenAI, Anthropic (optional, user-configured)
|
- **Cloud AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (optional, user-configured)
|
||||||
- **Caption export:** pysubs2 (Python)
|
- **Caption export:** pysubs2 (Python)
|
||||||
- **Audio UI:** wavesurfer.js
|
- **Audio UI:** wavesurfer.js
|
||||||
- **Transcript editor:** TipTap (ProseMirror)
|
- **Transcript editor:** TipTap (ProseMirror)
|
||||||
@@ -40,7 +40,13 @@ docs/ # Architecture and design documents
|
|||||||
- Database: UUIDs as primary keys (TEXT type in SQLite)
|
- Database: UUIDs as primary keys (TEXT type in SQLite)
|
||||||
- All timestamps in milliseconds (integer) relative to media file start
|
- All timestamps in milliseconds (integer) relative to media file start
|
||||||
|
|
||||||
|
## Distribution
|
||||||
|
- Python sidecar is frozen via PyInstaller into a standalone binary for distribution
|
||||||
|
- Tauri bundles the sidecar via `externalBin` — no Python required for end users
|
||||||
|
- CI/CD builds on Gitea Actions (Linux, Windows, macOS ARM)
|
||||||
|
- Dev mode uses system Python (`VOICE_TO_NOTES_DEV=1` or debug builds)
|
||||||
|
|
||||||
## Platform Targets
|
## Platform Targets
|
||||||
- Linux (primary development target)
|
- Linux x86_64 (primary development target)
|
||||||
- Windows (must work, tested before release)
|
- Windows x86_64
|
||||||
- macOS (future, not yet targeted)
|
- macOS aarch64 (Apple Silicon)
|
||||||
|
|||||||
94
README.md
94
README.md
@@ -2,28 +2,90 @@
|
|||||||
|
|
||||||
A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
|
A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
|
||||||
|
|
||||||
## Goals
|
## Features
|
||||||
|
|
||||||
- **Speech-to-Text Transcription** — Accurately convert spoken audio from recordings into text
|
- **Speech-to-Text Transcription** — Accurate transcription via faster-whisper (Whisper models) with word-level timestamps
|
||||||
- **Speaker Identification (Diarization)** — Detect and distinguish between different speakers in a conversation
|
- **Speaker Identification (Diarization)** — Detect and distinguish between speakers using pyannote.audio
|
||||||
- **Speaker Naming** — Assign and persist speaker names/IDs across the transcription
|
- **Synchronized Playback** — Click any word to seek to that point in the audio (Web Audio API for instant playback)
|
||||||
- **Synchronized Playback** — Click any transcribed text segment to play back the corresponding audio for review and correction
|
- **AI Integration** — Ask questions about your transcript via OpenAI, Anthropic, or any OpenAI-compatible API (LiteLLM proxies, Ollama, vLLM)
|
||||||
- **Export Formats**
|
- **Export Formats** — SRT, WebVTT, ASS captions, plain text, and Markdown with speaker labels
|
||||||
- Closed captioning files (SRT, VTT) for video
|
- **Cross-Platform** — Builds for Linux, Windows, and macOS (Apple Silicon)
|
||||||
- Plain text documents with speaker labels
|
|
||||||
- **AI Integration** — Connect to AI providers to ask questions about the conversation and generate condensed notes/summaries
|
|
||||||
|
|
||||||
## Platform Support
|
## Platform Support
|
||||||
|
|
||||||
| Platform | Status |
|
| Platform | Architecture | Status |
|
||||||
|----------|--------|
|
|----------|-------------|--------|
|
||||||
| Linux | Planned (initial target) |
|
| Linux | x86_64 | Supported |
|
||||||
| Windows | Planned (initial target) |
|
| Windows | x86_64 | Supported |
|
||||||
| macOS | Future (pending hardware) |
|
| macOS | ARM (Apple Silicon) | Supported |
|
||||||
|
|
||||||
## Project Status
|
## Tech Stack
|
||||||
|
|
||||||
**Early planning phase** — Architecture and technology decisions in progress.
|
- **Desktop shell:** Tauri v2 (Rust backend + Svelte 5 / TypeScript frontend)
|
||||||
|
- **ML pipeline:** Python sidecar (faster-whisper, pyannote.audio) — frozen via PyInstaller for distribution
|
||||||
|
- **Audio playback:** wavesurfer.js with Web Audio API backend
|
||||||
|
- **AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (local or remote)
|
||||||
|
- **Local AI:** Bundled llama-server (llama.cpp)
|
||||||
|
- **Caption export:** pysubs2
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Node.js 20+
|
||||||
|
- Rust (stable)
|
||||||
|
- Python 3.11+ with ML dependencies
|
||||||
|
- System: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev` (Linux)
|
||||||
|
|
||||||
|
### Getting Started
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install frontend dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Install Python sidecar dependencies
|
||||||
|
cd python && pip install -e . && cd ..
|
||||||
|
|
||||||
|
# Run in dev mode (uses system Python for the sidecar)
|
||||||
|
npm run tauri:dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building for Distribution
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the frozen Python sidecar
|
||||||
|
npm run sidecar:build
|
||||||
|
|
||||||
|
# Build the Tauri app (requires sidecar in src-tauri/binaries/)
|
||||||
|
npm run tauri build
|
||||||
|
```
|
||||||
|
|
||||||
|
### CI/CD
|
||||||
|
|
||||||
|
Gitea Actions workflows are in `.gitea/workflows/`. The build pipeline:
|
||||||
|
|
||||||
|
1. **Build sidecar** — PyInstaller-frozen Python binary per platform (CPU-only PyTorch)
|
||||||
|
2. **Build Tauri app** — Bundles the sidecar via `externalBin`, produces .deb/.AppImage (Linux), .msi (Windows), .dmg (macOS)
|
||||||
|
|
||||||
|
#### Required Secrets
|
||||||
|
|
||||||
|
| Secret | Purpose | Required? |
|
||||||
|
|--------|---------|-----------|
|
||||||
|
| `TAURI_SIGNING_PRIVATE_KEY` | Signs Tauri update bundles | Optional (for auto-updates) |
|
||||||
|
|
||||||
|
No other secrets are needed for building. AI provider API keys and HuggingFace tokens are configured by end users in the app's Settings.
|
||||||
|
|
||||||
|
### Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
src/ # Svelte 5 frontend
|
||||||
|
src-tauri/ # Rust backend (Tauri commands, sidecar manager, SQLite)
|
||||||
|
python/ # Python sidecar (transcription, diarization, AI)
|
||||||
|
voice_to_notes/ # Python package
|
||||||
|
build_sidecar.py # PyInstaller build script
|
||||||
|
voice_to_notes.spec # PyInstaller spec
|
||||||
|
.gitea/workflows/ # Gitea Actions CI/CD
|
||||||
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "voice-to-notes",
|
"name": "voice-to-notes",
|
||||||
"version": "0.1.0",
|
"version": "0.2.5",
|
||||||
"description": "Desktop app for transcribing audio/video with speaker identification",
|
"description": "Desktop app for transcribing audio/video with speaker identification",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -11,7 +11,9 @@
|
|||||||
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
||||||
"lint": "eslint .",
|
"lint": "eslint .",
|
||||||
"test": "vitest",
|
"test": "vitest",
|
||||||
"tauri": "tauri"
|
"tauri": "tauri",
|
||||||
|
"tauri:dev": "VOICE_TO_NOTES_DEV=1 tauri dev",
|
||||||
|
"sidecar:build": "cd python && python3 build_sidecar.py"
|
||||||
},
|
},
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
245
python/build_sidecar.py
Normal file
245
python/build_sidecar.py
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python build_sidecar.py [--cpu-only]
|
||||||
|
|
||||||
|
Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
|
||||||
|
sidecar binary and all dependencies. The main binary is renamed to include
|
||||||
|
the Tauri target triple for externalBin resolution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import shutil
|
||||||
|
import stat
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||||
|
DIST_DIR = SCRIPT_DIR / "dist"
|
||||||
|
BUILD_DIR = SCRIPT_DIR / "build"
|
||||||
|
SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
|
||||||
|
|
||||||
|
# Static ffmpeg download URLs (GPL-licensed builds)
|
||||||
|
FFMPEG_URLS: dict[str, str] = {
|
||||||
|
"linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
|
||||||
|
"darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||||
|
"darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||||
|
"win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_target_triple() -> str:
|
||||||
|
"""Determine the Tauri-compatible target triple for the current platform."""
|
||||||
|
machine = platform.machine().lower()
|
||||||
|
system = platform.system().lower()
|
||||||
|
|
||||||
|
arch_map = {
|
||||||
|
"x86_64": "x86_64",
|
||||||
|
"amd64": "x86_64",
|
||||||
|
"aarch64": "aarch64",
|
||||||
|
"arm64": "aarch64",
|
||||||
|
}
|
||||||
|
arch = arch_map.get(machine, machine)
|
||||||
|
|
||||||
|
if system == "linux":
|
||||||
|
return f"{arch}-unknown-linux-gnu"
|
||||||
|
elif system == "darwin":
|
||||||
|
return f"{arch}-apple-darwin"
|
||||||
|
elif system == "windows":
|
||||||
|
return f"{arch}-pc-windows-msvc"
|
||||||
|
else:
|
||||||
|
return f"{arch}-unknown-{system}"
|
||||||
|
|
||||||
|
|
||||||
|
def _has_uv() -> bool:
|
||||||
|
"""Check if uv is available."""
|
||||||
|
try:
|
||||||
|
subprocess.run(["uv", "--version"], capture_output=True, check=True)
|
||||||
|
return True
|
||||||
|
except (FileNotFoundError, subprocess.CalledProcessError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def create_venv_and_install(cpu_only: bool) -> Path:
|
||||||
|
"""Create a fresh venv and install dependencies.
|
||||||
|
|
||||||
|
Uses uv if available (much faster), falls back to standard venv + pip.
|
||||||
|
"""
|
||||||
|
venv_dir = BUILD_DIR / "sidecar-venv"
|
||||||
|
if venv_dir.exists():
|
||||||
|
shutil.rmtree(venv_dir)
|
||||||
|
|
||||||
|
use_uv = _has_uv()
|
||||||
|
|
||||||
|
if use_uv:
|
||||||
|
print(f"[build] Creating venv with uv at {venv_dir}")
|
||||||
|
subprocess.run(
|
||||||
|
["uv", "venv", "--python", f"{sys.version_info.major}.{sys.version_info.minor}",
|
||||||
|
str(venv_dir)],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(f"[build] Creating venv at {venv_dir}")
|
||||||
|
subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
|
||||||
|
|
||||||
|
# Determine python path inside venv
|
||||||
|
if sys.platform == "win32":
|
||||||
|
python = str(venv_dir / "Scripts" / "python.exe")
|
||||||
|
else:
|
||||||
|
python = str(venv_dir / "bin" / "python")
|
||||||
|
|
||||||
|
def pip_install(*args: str) -> None:
|
||||||
|
"""Install packages. Pass package names and flags only, not 'install'."""
|
||||||
|
if use_uv:
|
||||||
|
# Use --python with the venv directory (not the python binary) for uv
|
||||||
|
subprocess.run(
|
||||||
|
["uv", "pip", "install", "--python", str(venv_dir), *args],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
subprocess.run([python, "-m", "pip", "install", *args], check=True)
|
||||||
|
|
||||||
|
if not use_uv:
|
||||||
|
# Upgrade pip (uv doesn't need this)
|
||||||
|
pip_install("--upgrade", "pip", "setuptools", "wheel")
|
||||||
|
|
||||||
|
# Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
|
||||||
|
if cpu_only:
|
||||||
|
print("[build] Installing PyTorch (CPU-only)")
|
||||||
|
pip_install(
|
||||||
|
"torch", "torchaudio",
|
||||||
|
"--index-url", "https://download.pytorch.org/whl/cpu",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("[build] Installing PyTorch (default, may include CUDA)")
|
||||||
|
pip_install("torch", "torchaudio")
|
||||||
|
|
||||||
|
# Install project and dev deps (includes pyinstaller)
|
||||||
|
print("[build] Installing project dependencies")
|
||||||
|
pip_install("-e", f"{SCRIPT_DIR}[dev]")
|
||||||
|
|
||||||
|
return Path(python)
|
||||||
|
|
||||||
|
|
||||||
|
def run_pyinstaller(python: Path) -> Path:
|
||||||
|
"""Run PyInstaller using the spec file."""
|
||||||
|
print("[build] Running PyInstaller")
|
||||||
|
subprocess.run(
|
||||||
|
[str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
|
||||||
|
cwd=str(SCRIPT_DIR),
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
output_dir = DIST_DIR / "voice-to-notes-sidecar"
|
||||||
|
if not output_dir.exists():
|
||||||
|
raise RuntimeError(f"PyInstaller output not found at {output_dir}")
|
||||||
|
return output_dir
|
||||||
|
|
||||||
|
|
||||||
|
def download_ffmpeg(output_dir: Path) -> None:
|
||||||
|
"""Download a static ffmpeg/ffprobe binary for the current platform."""
|
||||||
|
system = sys.platform
|
||||||
|
machine = platform.machine().lower()
|
||||||
|
if machine in ("amd64", "x86_64"):
|
||||||
|
machine = "x86_64"
|
||||||
|
elif machine in ("aarch64", "arm64"):
|
||||||
|
machine = "arm64"
|
||||||
|
|
||||||
|
key = f"{system}-{machine}"
|
||||||
|
if system == "win32":
|
||||||
|
key = f"win32-{machine}"
|
||||||
|
elif system == "linux":
|
||||||
|
key = f"linux-{machine}"
|
||||||
|
|
||||||
|
url = FFMPEG_URLS.get(key)
|
||||||
|
if not url:
|
||||||
|
print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"[build] Downloading ffmpeg for {key}")
|
||||||
|
tmp_path = output_dir / "ffmpeg_download"
|
||||||
|
try:
|
||||||
|
urllib.request.urlretrieve(url, str(tmp_path))
|
||||||
|
|
||||||
|
if url.endswith(".tar.xz"):
|
||||||
|
# Linux static build
|
||||||
|
import tarfile
|
||||||
|
with tarfile.open(str(tmp_path), "r:xz") as tar:
|
||||||
|
for member in tar.getmembers():
|
||||||
|
basename = os.path.basename(member.name)
|
||||||
|
if basename in ("ffmpeg", "ffprobe"):
|
||||||
|
member.name = basename
|
||||||
|
tar.extract(member, path=str(output_dir))
|
||||||
|
dest = output_dir / basename
|
||||||
|
dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
|
||||||
|
elif url.endswith(".zip"):
|
||||||
|
with zipfile.ZipFile(str(tmp_path), "r") as zf:
|
||||||
|
for name in zf.namelist():
|
||||||
|
basename = os.path.basename(name)
|
||||||
|
if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
|
||||||
|
data = zf.read(name)
|
||||||
|
dest = output_dir / basename
|
||||||
|
dest.write_bytes(data)
|
||||||
|
if sys.platform != "win32":
|
||||||
|
dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
|
||||||
|
print("[build] ffmpeg downloaded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[build] Warning: Failed to download ffmpeg: {e}")
|
||||||
|
finally:
|
||||||
|
if tmp_path.exists():
|
||||||
|
tmp_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def rename_binary(output_dir: Path, target_triple: str) -> None:
|
||||||
|
"""Rename the main binary to include the target triple for Tauri."""
|
||||||
|
if sys.platform == "win32":
|
||||||
|
src = output_dir / "voice-to-notes-sidecar.exe"
|
||||||
|
dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
|
||||||
|
else:
|
||||||
|
src = output_dir / "voice-to-notes-sidecar"
|
||||||
|
dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
|
||||||
|
|
||||||
|
if src.exists():
|
||||||
|
print(f"[build] Renaming {src.name} -> {dst.name}")
|
||||||
|
src.rename(dst)
|
||||||
|
else:
|
||||||
|
print(f"[build] Warning: Expected binary not found at {src}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
|
||||||
|
parser.add_argument(
|
||||||
|
"--cpu-only",
|
||||||
|
action="store_true",
|
||||||
|
default=True,
|
||||||
|
help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--with-cuda",
|
||||||
|
action="store_true",
|
||||||
|
help="Install PyTorch with CUDA support",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
cpu_only = not args.with_cuda
|
||||||
|
|
||||||
|
target_triple = get_target_triple()
|
||||||
|
print(f"[build] Target triple: {target_triple}")
|
||||||
|
print(f"[build] CPU-only: {cpu_only}")
|
||||||
|
|
||||||
|
python = create_venv_and_install(cpu_only)
|
||||||
|
output_dir = run_pyinstaller(python)
|
||||||
|
download_ffmpeg(output_dir)
|
||||||
|
|
||||||
|
print(f"\n[build] Done! Sidecar built at: {output_dir}")
|
||||||
|
print(f"[build] Copy directory to src-tauri/sidecar/ for Tauri resource bundling")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "voice-to-notes"
|
name = "voice-to-notes"
|
||||||
version = "0.1.0"
|
version = "0.2.5"
|
||||||
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
|
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@@ -13,6 +13,8 @@ dependencies = [
|
|||||||
"faster-whisper>=1.1.0",
|
"faster-whisper>=1.1.0",
|
||||||
"pyannote.audio>=3.1.0",
|
"pyannote.audio>=3.1.0",
|
||||||
"pysubs2>=1.7.0",
|
"pysubs2>=1.7.0",
|
||||||
|
"openai>=1.0.0",
|
||||||
|
"anthropic>=0.20.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -20,6 +22,7 @@ dev = [
|
|||||||
"ruff>=0.8.0",
|
"ruff>=0.8.0",
|
||||||
"pytest>=8.0.0",
|
"pytest>=8.0.0",
|
||||||
"pytest-asyncio>=0.24.0",
|
"pytest-asyncio>=0.24.0",
|
||||||
|
"pyinstaller>=6.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
|||||||
@@ -1,7 +1,13 @@
|
|||||||
"""Tests for diarization service data structures and payload conversion."""
|
"""Tests for diarization service data structures and payload conversion."""
|
||||||
|
|
||||||
|
import time
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from voice_to_notes.services.diarize import (
|
from voice_to_notes.services.diarize import (
|
||||||
DiarizationResult,
|
DiarizationResult,
|
||||||
|
DiarizeService,
|
||||||
SpeakerSegment,
|
SpeakerSegment,
|
||||||
diarization_to_payload,
|
diarization_to_payload,
|
||||||
)
|
)
|
||||||
@@ -31,3 +37,74 @@ def test_diarization_to_payload_empty():
|
|||||||
assert payload["num_speakers"] == 0
|
assert payload["num_speakers"] == 0
|
||||||
assert payload["speaker_segments"] == []
|
assert payload["speaker_segments"] == []
|
||||||
assert payload["speakers"] == []
|
assert payload["speakers"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_diarize_threading_progress(monkeypatch):
|
||||||
|
"""Test that diarization emits progress while running in background thread."""
|
||||||
|
# Track written messages
|
||||||
|
written_messages = []
|
||||||
|
def mock_write(msg):
|
||||||
|
written_messages.append(msg)
|
||||||
|
|
||||||
|
# Mock pipeline that takes ~5 seconds
|
||||||
|
def slow_pipeline(file_path, **kwargs):
|
||||||
|
time.sleep(5)
|
||||||
|
# Return a mock diarization result (use spec=object to prevent
|
||||||
|
# hasattr returning True for speaker_diarization)
|
||||||
|
mock_result = MagicMock(spec=[])
|
||||||
|
mock_track = MagicMock()
|
||||||
|
mock_track.start = 0.0
|
||||||
|
mock_track.end = 5.0
|
||||||
|
mock_result.itertracks = MagicMock(return_value=[(mock_track, None, "SPEAKER_00")])
|
||||||
|
return mock_result
|
||||||
|
|
||||||
|
mock_pipeline_obj = MagicMock()
|
||||||
|
mock_pipeline_obj.side_effect = slow_pipeline
|
||||||
|
|
||||||
|
service = DiarizeService()
|
||||||
|
service._pipeline = mock_pipeline_obj
|
||||||
|
|
||||||
|
with patch("voice_to_notes.services.diarize.write_message", mock_write):
|
||||||
|
result = service.diarize(
|
||||||
|
request_id="req-1",
|
||||||
|
file_path="/fake/audio.wav",
|
||||||
|
audio_duration_sec=60.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter for diarizing progress messages (not loading_diarization or done)
|
||||||
|
diarizing_msgs = [
|
||||||
|
m for m in written_messages
|
||||||
|
if m.type == "progress" and m.payload.get("stage") == "diarizing"
|
||||||
|
and "elapsed" in m.payload.get("message", "")
|
||||||
|
]
|
||||||
|
|
||||||
|
# Should have at least 1 progress message (5s sleep / 2s interval = ~2 messages)
|
||||||
|
assert len(diarizing_msgs) >= 1, (
|
||||||
|
f"Expected at least 1 diarizing progress message, got {len(diarizing_msgs)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Progress percent should be between 20 and 85
|
||||||
|
for msg in diarizing_msgs:
|
||||||
|
pct = msg.payload["percent"]
|
||||||
|
assert 20 <= pct <= 85, f"Progress {pct} out of expected range 20-85"
|
||||||
|
|
||||||
|
# Result should be valid
|
||||||
|
assert result.num_speakers == 1
|
||||||
|
assert result.speakers == ["SPEAKER_00"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_diarize_threading_error_propagation(monkeypatch):
|
||||||
|
"""Test that errors from the background thread are properly raised."""
|
||||||
|
mock_pipeline_obj = MagicMock()
|
||||||
|
mock_pipeline_obj.side_effect = RuntimeError("Pipeline crashed")
|
||||||
|
|
||||||
|
service = DiarizeService()
|
||||||
|
service._pipeline = mock_pipeline_obj
|
||||||
|
|
||||||
|
with patch("voice_to_notes.services.diarize.write_message", lambda m: None):
|
||||||
|
with pytest.raises(RuntimeError, match="Pipeline crashed"):
|
||||||
|
service.diarize(
|
||||||
|
request_id="req-1",
|
||||||
|
file_path="/fake/audio.wav",
|
||||||
|
audio_duration_sec=30.0,
|
||||||
|
)
|
||||||
|
|||||||
@@ -3,8 +3,10 @@
|
|||||||
from voice_to_notes.ipc.messages import (
|
from voice_to_notes.ipc.messages import (
|
||||||
IPCMessage,
|
IPCMessage,
|
||||||
error_message,
|
error_message,
|
||||||
|
partial_segment_message,
|
||||||
progress_message,
|
progress_message,
|
||||||
ready_message,
|
ready_message,
|
||||||
|
speaker_update_message,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -48,3 +50,16 @@ def test_ready_message():
|
|||||||
assert msg.type == "ready"
|
assert msg.type == "ready"
|
||||||
assert msg.id == "system"
|
assert msg.id == "system"
|
||||||
assert "version" in msg.payload
|
assert "version" in msg.payload
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_segment_message():
|
||||||
|
msg = partial_segment_message("req-1", {"index": 0, "text": "hello"})
|
||||||
|
assert msg.type == "pipeline.segment"
|
||||||
|
assert msg.payload["index"] == 0
|
||||||
|
assert msg.payload["text"] == "hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_speaker_update_message():
|
||||||
|
msg = speaker_update_message("req-1", [{"index": 0, "speaker": "SPEAKER_00"}])
|
||||||
|
assert msg.type == "pipeline.speaker_update"
|
||||||
|
assert msg.payload["updates"][0]["speaker"] == "SPEAKER_00"
|
||||||
|
|||||||
@@ -88,3 +88,18 @@ def test_merge_results_no_speaker_segments():
|
|||||||
|
|
||||||
result = service._merge_results(transcription, [])
|
result = service._merge_results(transcription, [])
|
||||||
assert result.segments[0].speaker is None
|
assert result.segments[0].speaker is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_speaker_update_generation():
|
||||||
|
"""Test that speaker updates are generated after merge."""
|
||||||
|
result = PipelineResult(
|
||||||
|
segments=[
|
||||||
|
PipelineSegment(text="Hello", start_ms=0, end_ms=1000, speaker="SPEAKER_00"),
|
||||||
|
PipelineSegment(text="World", start_ms=1000, end_ms=2000, speaker="SPEAKER_01"),
|
||||||
|
PipelineSegment(text="Foo", start_ms=2000, end_ms=3000, speaker=None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
|
||||||
|
assert len(updates) == 2
|
||||||
|
assert updates[0] == {"index": 0, "speaker": "SPEAKER_00"}
|
||||||
|
assert updates[1] == {"index": 1, "speaker": "SPEAKER_01"}
|
||||||
|
|||||||
@@ -5,16 +5,23 @@ import json
|
|||||||
|
|
||||||
from voice_to_notes.ipc.messages import IPCMessage
|
from voice_to_notes.ipc.messages import IPCMessage
|
||||||
from voice_to_notes.ipc.protocol import read_message, write_message
|
from voice_to_notes.ipc.protocol import read_message, write_message
|
||||||
|
import voice_to_notes.ipc.protocol as protocol
|
||||||
|
|
||||||
|
|
||||||
def test_write_message(capsys):
|
def test_write_message():
|
||||||
|
buf = io.StringIO()
|
||||||
|
# Temporarily replace the IPC output stream
|
||||||
|
old_out = protocol._ipc_out
|
||||||
|
protocol._ipc_out = buf
|
||||||
|
try:
|
||||||
msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
|
msg = IPCMessage(id="req-1", type="pong", payload={"ok": True})
|
||||||
write_message(msg)
|
write_message(msg)
|
||||||
captured = capsys.readouterr()
|
parsed = json.loads(buf.getvalue().strip())
|
||||||
parsed = json.loads(captured.out.strip())
|
|
||||||
assert parsed["id"] == "req-1"
|
assert parsed["id"] == "req-1"
|
||||||
assert parsed["type"] == "pong"
|
assert parsed["type"] == "pong"
|
||||||
assert parsed["payload"]["ok"] is True
|
assert parsed["payload"]["ok"] is True
|
||||||
|
finally:
|
||||||
|
protocol._ipc_out = old_out
|
||||||
|
|
||||||
|
|
||||||
def test_read_message(monkeypatch):
|
def test_read_message(monkeypatch):
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
"""Tests for transcription service."""
|
"""Tests for transcription service."""
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
|
||||||
from voice_to_notes.services.transcribe import (
|
from voice_to_notes.services.transcribe import (
|
||||||
SegmentResult,
|
SegmentResult,
|
||||||
|
TranscribeService,
|
||||||
TranscriptionResult,
|
TranscriptionResult,
|
||||||
WordResult,
|
WordResult,
|
||||||
result_to_payload,
|
result_to_payload,
|
||||||
@@ -49,3 +52,149 @@ def test_result_to_payload_empty():
|
|||||||
assert payload["segments"] == []
|
assert payload["segments"] == []
|
||||||
assert payload["language"] == ""
|
assert payload["language"] == ""
|
||||||
assert payload["duration_ms"] == 0
|
assert payload["duration_ms"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_on_segment_callback():
|
||||||
|
"""Test that on_segment callback is invoked with correct SegmentResult and index."""
|
||||||
|
callback_args = []
|
||||||
|
|
||||||
|
def mock_callback(seg: SegmentResult, index: int):
|
||||||
|
callback_args.append((seg.text, index))
|
||||||
|
|
||||||
|
# Test that passing on_segment doesn't break the function signature
|
||||||
|
# (Full integration test would require mocking WhisperModel)
|
||||||
|
service = TranscribeService()
|
||||||
|
# Verify the parameter exists by checking the signature
|
||||||
|
sig = inspect.signature(service.transcribe)
|
||||||
|
assert "on_segment" in sig.parameters
|
||||||
|
|
||||||
|
|
||||||
|
def test_progress_every_segment(monkeypatch):
|
||||||
|
"""Verify a progress message is sent for every segment, not just every 5th."""
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
from voice_to_notes.services.transcribe import TranscribeService
|
||||||
|
|
||||||
|
# Mock WhisperModel
|
||||||
|
mock_model = MagicMock()
|
||||||
|
|
||||||
|
# Create mock segments (8 of them to test > 5)
|
||||||
|
mock_segments = []
|
||||||
|
for i in range(8):
|
||||||
|
seg = MagicMock()
|
||||||
|
seg.start = i * 1.0
|
||||||
|
seg.end = (i + 1) * 1.0
|
||||||
|
seg.text = f"Segment {i}"
|
||||||
|
seg.words = []
|
||||||
|
mock_segments.append(seg)
|
||||||
|
|
||||||
|
# Mock info object
|
||||||
|
mock_info = MagicMock()
|
||||||
|
mock_info.language = "en"
|
||||||
|
mock_info.language_probability = 0.99
|
||||||
|
mock_info.duration = 8.0
|
||||||
|
|
||||||
|
mock_model.transcribe.return_value = (iter(mock_segments), mock_info)
|
||||||
|
|
||||||
|
# Track write_message calls
|
||||||
|
written_messages = []
|
||||||
|
|
||||||
|
def mock_write(msg):
|
||||||
|
written_messages.append(msg)
|
||||||
|
|
||||||
|
service = TranscribeService()
|
||||||
|
service._model = mock_model
|
||||||
|
service._current_model_name = "base"
|
||||||
|
service._current_device = "cpu"
|
||||||
|
service._current_compute_type = "int8"
|
||||||
|
|
||||||
|
with patch("voice_to_notes.services.transcribe.write_message", mock_write):
|
||||||
|
service.transcribe("req-1", "/fake/audio.wav")
|
||||||
|
|
||||||
|
# Filter for "transcribing" stage progress messages
|
||||||
|
transcribing_msgs = [
|
||||||
|
m for m in written_messages
|
||||||
|
if m.type == "progress" and m.payload.get("stage") == "transcribing"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Should have one per segment (8) + the initial "Starting transcription..." message
|
||||||
|
# The initial "Starting transcription..." is also stage "transcribing" — so 8 + 1 = 9
|
||||||
|
assert len(transcribing_msgs) >= 8, (
|
||||||
|
f"Expected at least 8 transcribing progress messages (one per segment), got {len(transcribing_msgs)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_chunk_report_size_progress():
|
||||||
|
"""Test CHUNK_REPORT_SIZE progress emission."""
|
||||||
|
from voice_to_notes.services.transcribe import CHUNK_REPORT_SIZE
|
||||||
|
assert CHUNK_REPORT_SIZE == 10
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcribe_chunked_with_mocked_ffmpeg(monkeypatch):
|
||||||
|
"""Test transcribe_chunked with mocked ffmpeg/ffprobe and mocked WhisperModel."""
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
from voice_to_notes.services.transcribe import TranscribeService, SegmentResult, WordResult
|
||||||
|
|
||||||
|
# Mock subprocess.run for ffprobe (returns duration of 700s = ~2 chunks at 300s each)
|
||||||
|
original_run = __import__("subprocess").run
|
||||||
|
|
||||||
|
def mock_subprocess_run(cmd, **kwargs):
|
||||||
|
if "ffprobe" in cmd:
|
||||||
|
result = MagicMock()
|
||||||
|
result.stdout = "700.0\n"
|
||||||
|
result.returncode = 0
|
||||||
|
return result
|
||||||
|
elif "ffmpeg" in cmd:
|
||||||
|
# Create an empty temp file (simulate chunk extraction)
|
||||||
|
# The output file is the last argument
|
||||||
|
import pathlib
|
||||||
|
output_file = cmd[-1]
|
||||||
|
pathlib.Path(output_file).touch()
|
||||||
|
result = MagicMock()
|
||||||
|
result.returncode = 0
|
||||||
|
return result
|
||||||
|
return original_run(cmd, **kwargs)
|
||||||
|
|
||||||
|
# Mock WhisperModel
|
||||||
|
mock_model = MagicMock()
|
||||||
|
def mock_transcribe_call(file_path, **kwargs):
|
||||||
|
mock_segments = []
|
||||||
|
for i in range(3):
|
||||||
|
seg = MagicMock()
|
||||||
|
seg.start = i * 1.0
|
||||||
|
seg.end = (i + 1) * 1.0
|
||||||
|
seg.text = f"Segment {i}"
|
||||||
|
seg.words = []
|
||||||
|
mock_segments.append(seg)
|
||||||
|
mock_info = MagicMock()
|
||||||
|
mock_info.language = "en"
|
||||||
|
mock_info.language_probability = 0.99
|
||||||
|
mock_info.duration = 300.0
|
||||||
|
return iter(mock_segments), mock_info
|
||||||
|
|
||||||
|
mock_model.transcribe = mock_transcribe_call
|
||||||
|
|
||||||
|
service = TranscribeService()
|
||||||
|
service._model = mock_model
|
||||||
|
service._current_model_name = "base"
|
||||||
|
service._current_device = "cpu"
|
||||||
|
service._current_compute_type = "int8"
|
||||||
|
|
||||||
|
written_messages = []
|
||||||
|
def mock_write(msg):
|
||||||
|
written_messages.append(msg)
|
||||||
|
|
||||||
|
with patch("subprocess.run", mock_subprocess_run), \
|
||||||
|
patch("voice_to_notes.services.transcribe.write_message", mock_write):
|
||||||
|
result = service.transcribe_chunked("req-1", "/fake/long_audio.wav")
|
||||||
|
|
||||||
|
# Should have segments from multiple chunks
|
||||||
|
assert len(result.segments) > 0
|
||||||
|
|
||||||
|
# Verify timestamp offsets — segments from chunk 1 should start at 0,
|
||||||
|
# segments from chunk 2 should be offset by 300000ms
|
||||||
|
if len(result.segments) > 3:
|
||||||
|
# Chunk 2 segments should have offset timestamps
|
||||||
|
assert result.segments[3].start_ms >= 300000
|
||||||
|
|
||||||
|
assert result.duration_ms == 700000
|
||||||
|
assert result.language == "en"
|
||||||
|
|||||||
73
python/voice_to_notes.spec
Normal file
73
python/voice_to_notes.spec
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
"""PyInstaller spec for the Voice to Notes sidecar binary."""
|
||||||
|
|
||||||
|
from PyInstaller.utils.hooks import collect_all
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
|
||||||
|
# Collect all files for packages that have shared libraries / data files
|
||||||
|
# PyInstaller often misses these for ML packages
|
||||||
|
ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
|
||||||
|
faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
|
||||||
|
"faster_whisper"
|
||||||
|
)
|
||||||
|
pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
["voice_to_notes/main.py"],
|
||||||
|
pathex=[],
|
||||||
|
binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
|
||||||
|
datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
|
||||||
|
hiddenimports=[
|
||||||
|
"torch",
|
||||||
|
"torchaudio",
|
||||||
|
"huggingface_hub",
|
||||||
|
"pysubs2",
|
||||||
|
"openai",
|
||||||
|
"anthropic",
|
||||||
|
"litellm",
|
||||||
|
]
|
||||||
|
+ ctranslate2_hiddenimports
|
||||||
|
+ faster_whisper_hiddenimports
|
||||||
|
+ pyannote_hiddenimports,
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=[
|
||||||
|
"tkinter", "test", "unittest", "pip", "setuptools",
|
||||||
|
# ctranslate2.converters imports torch at module level and causes
|
||||||
|
# circular import crashes under PyInstaller. These modules are only
|
||||||
|
# needed for model format conversion, never for inference.
|
||||||
|
"ctranslate2.converters",
|
||||||
|
],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
[],
|
||||||
|
exclude_binaries=True,
|
||||||
|
name="voice-to-notes-sidecar",
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
console=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
coll = COLLECT(
|
||||||
|
exe,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
upx_exclude=[],
|
||||||
|
name="voice-to-notes-sidecar",
|
||||||
|
)
|
||||||
@@ -2,7 +2,10 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ctypes
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -21,6 +24,77 @@ class HardwareInfo:
|
|||||||
recommended_compute_type: str = "int8"
|
recommended_compute_type: str = "int8"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_ram_mb() -> int:
|
||||||
|
"""Detect total system RAM in MB (cross-platform).
|
||||||
|
|
||||||
|
Tries platform-specific methods in order:
|
||||||
|
1. Linux: read /proc/meminfo
|
||||||
|
2. macOS: sysctl hw.memsize
|
||||||
|
3. Windows: GlobalMemoryStatusEx via ctypes
|
||||||
|
4. Fallback: os.sysconf (most Unix systems)
|
||||||
|
|
||||||
|
Returns 0 if all methods fail.
|
||||||
|
"""
|
||||||
|
# Linux: read /proc/meminfo
|
||||||
|
if sys.platform == "linux":
|
||||||
|
try:
|
||||||
|
with open("/proc/meminfo") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("MemTotal:"):
|
||||||
|
# Value is in kB
|
||||||
|
return int(line.split()[1]) // 1024
|
||||||
|
except (FileNotFoundError, ValueError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# macOS: sysctl hw.memsize (returns bytes)
|
||||||
|
if sys.platform == "darwin":
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["sysctl", "-n", "hw.memsize"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
return int(result.stdout.strip()) // (1024 * 1024)
|
||||||
|
except (subprocess.SubprocessError, ValueError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Windows: GlobalMemoryStatusEx via ctypes
|
||||||
|
if sys.platform == "win32":
|
||||||
|
try:
|
||||||
|
|
||||||
|
class MEMORYSTATUSEX(ctypes.Structure):
|
||||||
|
_fields_ = [
|
||||||
|
("dwLength", ctypes.c_ulong),
|
||||||
|
("dwMemoryLoad", ctypes.c_ulong),
|
||||||
|
("ullTotalPhys", ctypes.c_ulonglong),
|
||||||
|
("ullAvailPhys", ctypes.c_ulonglong),
|
||||||
|
("ullTotalPageFile", ctypes.c_ulonglong),
|
||||||
|
("ullAvailPageFile", ctypes.c_ulonglong),
|
||||||
|
("ullTotalVirtual", ctypes.c_ulonglong),
|
||||||
|
("ullAvailVirtual", ctypes.c_ulonglong),
|
||||||
|
("ullAvailExtendedVirtual", ctypes.c_ulonglong),
|
||||||
|
]
|
||||||
|
|
||||||
|
mem_status = MEMORYSTATUSEX()
|
||||||
|
mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
|
||||||
|
if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
|
||||||
|
return int(mem_status.ullTotalPhys) // (1024 * 1024)
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback: os.sysconf (works on most Unix systems)
|
||||||
|
try:
|
||||||
|
page_size = os.sysconf("SC_PAGE_SIZE")
|
||||||
|
phys_pages = os.sysconf("SC_PHYS_PAGES")
|
||||||
|
if page_size > 0 and phys_pages > 0:
|
||||||
|
return (page_size * phys_pages) // (1024 * 1024)
|
||||||
|
except (ValueError, OSError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def detect_hardware() -> HardwareInfo:
|
def detect_hardware() -> HardwareInfo:
|
||||||
"""Detect available hardware and recommend model configuration."""
|
"""Detect available hardware and recommend model configuration."""
|
||||||
info = HardwareInfo()
|
info = HardwareInfo()
|
||||||
@@ -28,16 +102,8 @@ def detect_hardware() -> HardwareInfo:
|
|||||||
# CPU info
|
# CPU info
|
||||||
info.cpu_cores = os.cpu_count() or 1
|
info.cpu_cores = os.cpu_count() or 1
|
||||||
|
|
||||||
# RAM info
|
# RAM info (cross-platform)
|
||||||
try:
|
info.ram_mb = _detect_ram_mb()
|
||||||
with open("/proc/meminfo") as f:
|
|
||||||
for line in f:
|
|
||||||
if line.startswith("MemTotal:"):
|
|
||||||
# Value is in kB
|
|
||||||
info.ram_mb = int(line.split()[1]) // 1024
|
|
||||||
break
|
|
||||||
except (FileNotFoundError, ValueError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# CUDA detection
|
# CUDA detection
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -41,11 +41,15 @@ def ping_handler(msg: IPCMessage) -> IPCMessage:
|
|||||||
|
|
||||||
def make_transcribe_handler() -> HandlerFunc:
|
def make_transcribe_handler() -> HandlerFunc:
|
||||||
"""Create a transcription handler with a persistent TranscribeService."""
|
"""Create a transcription handler with a persistent TranscribeService."""
|
||||||
from voice_to_notes.services.transcribe import TranscribeService, result_to_payload
|
service = None
|
||||||
|
|
||||||
service = TranscribeService()
|
|
||||||
|
|
||||||
def handler(msg: IPCMessage) -> IPCMessage:
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
nonlocal service
|
||||||
|
if service is None:
|
||||||
|
from voice_to_notes.services.transcribe import TranscribeService
|
||||||
|
service = TranscribeService()
|
||||||
|
from voice_to_notes.services.transcribe import result_to_payload
|
||||||
|
|
||||||
payload = msg.payload
|
payload = msg.payload
|
||||||
result = service.transcribe(
|
result = service.transcribe(
|
||||||
request_id=msg.id,
|
request_id=msg.id,
|
||||||
@@ -66,11 +70,15 @@ def make_transcribe_handler() -> HandlerFunc:
|
|||||||
|
|
||||||
def make_diarize_handler() -> HandlerFunc:
|
def make_diarize_handler() -> HandlerFunc:
|
||||||
"""Create a diarization handler with a persistent DiarizeService."""
|
"""Create a diarization handler with a persistent DiarizeService."""
|
||||||
from voice_to_notes.services.diarize import DiarizeService, diarization_to_payload
|
service = None
|
||||||
|
|
||||||
service = DiarizeService()
|
|
||||||
|
|
||||||
def handler(msg: IPCMessage) -> IPCMessage:
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
nonlocal service
|
||||||
|
if service is None:
|
||||||
|
from voice_to_notes.services.diarize import DiarizeService
|
||||||
|
service = DiarizeService()
|
||||||
|
from voice_to_notes.services.diarize import diarization_to_payload
|
||||||
|
|
||||||
payload = msg.payload
|
payload = msg.payload
|
||||||
result = service.diarize(
|
result = service.diarize(
|
||||||
request_id=msg.id,
|
request_id=msg.id,
|
||||||
@@ -88,13 +96,90 @@ def make_diarize_handler() -> HandlerFunc:
|
|||||||
return handler
|
return handler
|
||||||
|
|
||||||
|
|
||||||
def make_pipeline_handler() -> HandlerFunc:
|
def make_diarize_download_handler() -> HandlerFunc:
|
||||||
"""Create a full pipeline handler (transcribe + diarize + merge)."""
|
"""Create a handler that downloads/validates the diarization model."""
|
||||||
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
|
import os
|
||||||
|
|
||||||
service = PipelineService()
|
|
||||||
|
|
||||||
def handler(msg: IPCMessage) -> IPCMessage:
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
payload = msg.payload
|
||||||
|
hf_token = payload.get("hf_token")
|
||||||
|
|
||||||
|
try:
|
||||||
|
import huggingface_hub
|
||||||
|
|
||||||
|
# Disable pyannote telemetry (has a bug in v4.0.4)
|
||||||
|
os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
|
||||||
|
from pyannote.audio import Pipeline
|
||||||
|
|
||||||
|
# Persist token globally so ALL huggingface_hub downloads use auth.
|
||||||
|
# Setting env var alone isn't enough — pyannote's internal sub-downloads
|
||||||
|
# (e.g. PLDA.from_pretrained) don't forward the token= parameter.
|
||||||
|
# login() writes the token to ~/.cache/huggingface/token which
|
||||||
|
# huggingface_hub reads automatically for all downloads.
|
||||||
|
if hf_token:
|
||||||
|
os.environ["HF_TOKEN"] = hf_token
|
||||||
|
huggingface_hub.login(token=hf_token, add_to_git_credential=False)
|
||||||
|
|
||||||
|
# Pre-download sub-models that pyannote loads internally.
|
||||||
|
# This ensures they're cached before Pipeline.from_pretrained
|
||||||
|
# tries to load them (where token forwarding can fail).
|
||||||
|
sub_models = [
|
||||||
|
"pyannote/segmentation-3.0",
|
||||||
|
"pyannote/speaker-diarization-community-1",
|
||||||
|
]
|
||||||
|
for model_id in sub_models:
|
||||||
|
print(f"[sidecar] Pre-downloading {model_id}...", file=sys.stderr, flush=True)
|
||||||
|
huggingface_hub.snapshot_download(model_id, token=hf_token)
|
||||||
|
|
||||||
|
print("[sidecar] Downloading diarization pipeline...", file=sys.stderr, flush=True)
|
||||||
|
pipeline = Pipeline.from_pretrained(
|
||||||
|
"pyannote/speaker-diarization-3.1",
|
||||||
|
token=hf_token,
|
||||||
|
)
|
||||||
|
print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
|
||||||
|
return IPCMessage(
|
||||||
|
id=msg.id,
|
||||||
|
type="diarize.download.result",
|
||||||
|
payload={"ok": True},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
print(f"[sidecar] Model download error: {error_msg}", file=sys.stderr, flush=True)
|
||||||
|
# Make common errors more user-friendly
|
||||||
|
if "403" in error_msg or "gated" in error_msg.lower():
|
||||||
|
# Try to extract the specific model name from the error
|
||||||
|
import re
|
||||||
|
model_match = re.search(r"pyannote/[\w-]+", error_msg)
|
||||||
|
if model_match:
|
||||||
|
model_name = model_match.group(0)
|
||||||
|
error_msg = (
|
||||||
|
f"Access denied for {model_name}. "
|
||||||
|
f"Please visit huggingface.co/{model_name} "
|
||||||
|
f"and accept the license agreement, then try again."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
error_msg = (
|
||||||
|
"Access denied. Please accept the license agreements for all "
|
||||||
|
"required pyannote models on HuggingFace."
|
||||||
|
)
|
||||||
|
elif "401" in error_msg:
|
||||||
|
error_msg = "Invalid token. Please check your HuggingFace token."
|
||||||
|
return error_message(msg.id, "download_error", error_msg)
|
||||||
|
|
||||||
|
return handler
|
||||||
|
|
||||||
|
|
||||||
|
def make_pipeline_handler() -> HandlerFunc:
|
||||||
|
"""Create a full pipeline handler (transcribe + diarize + merge)."""
|
||||||
|
service = None
|
||||||
|
|
||||||
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
nonlocal service
|
||||||
|
if service is None:
|
||||||
|
from voice_to_notes.services.pipeline import PipelineService
|
||||||
|
service = PipelineService()
|
||||||
|
from voice_to_notes.services.pipeline import pipeline_result_to_payload
|
||||||
|
|
||||||
payload = msg.payload
|
payload = msg.payload
|
||||||
result = service.run(
|
result = service.run(
|
||||||
request_id=msg.id,
|
request_id=msg.id,
|
||||||
@@ -107,6 +192,7 @@ def make_pipeline_handler() -> HandlerFunc:
|
|||||||
min_speakers=payload.get("min_speakers"),
|
min_speakers=payload.get("min_speakers"),
|
||||||
max_speakers=payload.get("max_speakers"),
|
max_speakers=payload.get("max_speakers"),
|
||||||
skip_diarization=payload.get("skip_diarization", False),
|
skip_diarization=payload.get("skip_diarization", False),
|
||||||
|
hf_token=payload.get("hf_token"),
|
||||||
)
|
)
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
id=msg.id,
|
id=msg.id,
|
||||||
@@ -119,11 +205,15 @@ def make_pipeline_handler() -> HandlerFunc:
|
|||||||
|
|
||||||
def make_export_handler() -> HandlerFunc:
|
def make_export_handler() -> HandlerFunc:
|
||||||
"""Create an export handler."""
|
"""Create an export handler."""
|
||||||
from voice_to_notes.services.export import ExportService, make_export_request
|
service = None
|
||||||
|
|
||||||
service = ExportService()
|
|
||||||
|
|
||||||
def handler(msg: IPCMessage) -> IPCMessage:
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
nonlocal service
|
||||||
|
if service is None:
|
||||||
|
from voice_to_notes.services.export import ExportService
|
||||||
|
service = ExportService()
|
||||||
|
from voice_to_notes.services.export import make_export_request
|
||||||
|
|
||||||
request = make_export_request(msg.payload)
|
request = make_export_request(msg.payload)
|
||||||
output_path = service.export(request)
|
output_path = service.export(request)
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
@@ -137,11 +227,14 @@ def make_export_handler() -> HandlerFunc:
|
|||||||
|
|
||||||
def make_ai_chat_handler() -> HandlerFunc:
|
def make_ai_chat_handler() -> HandlerFunc:
|
||||||
"""Create an AI chat handler with persistent AIProviderService."""
|
"""Create an AI chat handler with persistent AIProviderService."""
|
||||||
from voice_to_notes.services.ai_provider import create_default_service
|
service = None
|
||||||
|
|
||||||
service = create_default_service()
|
|
||||||
|
|
||||||
def handler(msg: IPCMessage) -> IPCMessage:
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
nonlocal service
|
||||||
|
if service is None:
|
||||||
|
from voice_to_notes.services.ai_provider import create_default_service
|
||||||
|
service = create_default_service()
|
||||||
|
|
||||||
payload = msg.payload
|
payload = msg.payload
|
||||||
action = payload.get("action", "chat")
|
action = payload.get("action", "chat")
|
||||||
|
|
||||||
@@ -186,10 +279,12 @@ def make_ai_chat_handler() -> HandlerFunc:
|
|||||||
model=config.get("model", "claude-sonnet-4-6"),
|
model=config.get("model", "claude-sonnet-4-6"),
|
||||||
))
|
))
|
||||||
elif provider_name == "litellm":
|
elif provider_name == "litellm":
|
||||||
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
|
from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
|
||||||
|
|
||||||
service.register_provider("litellm", LiteLLMProvider(
|
service.register_provider("litellm", OpenAICompatibleProvider(
|
||||||
model=config.get("model", "gpt-4o-mini"),
|
model=config.get("model", "gpt-4o-mini"),
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
api_base=config.get("api_base"),
|
||||||
))
|
))
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
id=msg.id,
|
id=msg.id,
|
||||||
|
|||||||
@@ -34,6 +34,14 @@ def progress_message(request_id: str, percent: int, stage: str, message: str) ->
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def partial_segment_message(request_id: str, segment_data: dict) -> IPCMessage:
|
||||||
|
return IPCMessage(id=request_id, type="pipeline.segment", payload=segment_data)
|
||||||
|
|
||||||
|
|
||||||
|
def speaker_update_message(request_id: str, updates: list[dict]) -> IPCMessage:
|
||||||
|
return IPCMessage(id=request_id, type="pipeline.speaker_update", payload={"updates": updates})
|
||||||
|
|
||||||
|
|
||||||
def error_message(request_id: str, code: str, message: str) -> IPCMessage:
|
def error_message(request_id: str, code: str, message: str) -> IPCMessage:
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
id=request_id,
|
id=request_id,
|
||||||
|
|||||||
@@ -1,13 +1,53 @@
|
|||||||
"""JSON-line protocol reader/writer over stdin/stdout."""
|
"""JSON-line protocol reader/writer over stdin/stdout.
|
||||||
|
|
||||||
|
IMPORTANT: stdout is reserved exclusively for IPC messages.
|
||||||
|
At init time we save the real stdout, then redirect sys.stdout → stderr
|
||||||
|
so that any rogue print() calls from libraries don't corrupt the IPC stream.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from voice_to_notes.ipc.messages import IPCMessage
|
from voice_to_notes.ipc.messages import IPCMessage
|
||||||
|
|
||||||
|
# Save the real stdout fd for IPC before any library can pollute it.
|
||||||
|
# Then redirect sys.stdout to stderr so library prints go to stderr.
|
||||||
|
_ipc_out: io.TextIOWrapper | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def init_ipc() -> None:
|
||||||
|
"""Capture real stdout for IPC and redirect sys.stdout to stderr.
|
||||||
|
|
||||||
|
Must be called once at sidecar startup, before importing any ML libraries.
|
||||||
|
"""
|
||||||
|
global _ipc_out
|
||||||
|
if _ipc_out is not None:
|
||||||
|
return # already initialised
|
||||||
|
|
||||||
|
# Duplicate the real stdout fd so we keep it even after redirect
|
||||||
|
real_stdout_fd = os.dup(sys.stdout.fileno())
|
||||||
|
_ipc_out = io.TextIOWrapper(
|
||||||
|
io.BufferedWriter(io.FileIO(real_stdout_fd, "w")),
|
||||||
|
encoding="utf-8",
|
||||||
|
line_buffering=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Redirect sys.stdout → stderr so print() from libraries goes to stderr
|
||||||
|
sys.stdout = sys.stderr
|
||||||
|
|
||||||
|
|
||||||
|
def _get_ipc_out() -> io.TextIOWrapper:
|
||||||
|
"""Return the IPC output stream, falling back to sys.__stdout__."""
|
||||||
|
if _ipc_out is not None:
|
||||||
|
return _ipc_out
|
||||||
|
# Fallback if init_ipc() was never called (e.g. in tests)
|
||||||
|
return sys.__stdout__
|
||||||
|
|
||||||
|
|
||||||
def read_message() -> IPCMessage | None:
|
def read_message() -> IPCMessage | None:
|
||||||
"""Read a single JSON-line message from stdin. Returns None on EOF."""
|
"""Read a single JSON-line message from stdin. Returns None on EOF."""
|
||||||
@@ -29,17 +69,19 @@ def read_message() -> IPCMessage | None:
|
|||||||
|
|
||||||
|
|
||||||
def write_message(msg: IPCMessage) -> None:
|
def write_message(msg: IPCMessage) -> None:
|
||||||
"""Write a JSON-line message to stdout."""
|
"""Write a JSON-line message to the IPC channel (real stdout)."""
|
||||||
|
out = _get_ipc_out()
|
||||||
line = json.dumps(msg.to_dict(), separators=(",", ":"))
|
line = json.dumps(msg.to_dict(), separators=(",", ":"))
|
||||||
sys.stdout.write(line + "\n")
|
out.write(line + "\n")
|
||||||
sys.stdout.flush()
|
out.flush()
|
||||||
|
|
||||||
|
|
||||||
def write_dict(data: dict[str, Any]) -> None:
|
def write_dict(data: dict[str, Any]) -> None:
|
||||||
"""Write a raw dict as a JSON-line message to stdout."""
|
"""Write a raw dict as a JSON-line message to the IPC channel."""
|
||||||
|
out = _get_ipc_out()
|
||||||
line = json.dumps(data, separators=(",", ":"))
|
line = json.dumps(data, separators=(",", ":"))
|
||||||
sys.stdout.write(line + "\n")
|
out.write(line + "\n")
|
||||||
sys.stdout.flush()
|
out.flush()
|
||||||
|
|
||||||
|
|
||||||
def _log(message: str) -> None:
|
def _log(message: str) -> None:
|
||||||
|
|||||||
@@ -5,18 +5,25 @@ from __future__ import annotations
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from voice_to_notes.ipc.handlers import (
|
# CRITICAL: Capture real stdout for IPC *before* importing any ML libraries
|
||||||
|
# that might print to stdout and corrupt the JSON-line protocol.
|
||||||
|
from voice_to_notes.ipc.protocol import init_ipc
|
||||||
|
|
||||||
|
init_ipc()
|
||||||
|
|
||||||
|
from voice_to_notes.ipc.handlers import ( # noqa: E402
|
||||||
HandlerRegistry,
|
HandlerRegistry,
|
||||||
hardware_detect_handler,
|
hardware_detect_handler,
|
||||||
make_ai_chat_handler,
|
make_ai_chat_handler,
|
||||||
|
make_diarize_download_handler,
|
||||||
make_diarize_handler,
|
make_diarize_handler,
|
||||||
make_export_handler,
|
make_export_handler,
|
||||||
make_pipeline_handler,
|
make_pipeline_handler,
|
||||||
make_transcribe_handler,
|
make_transcribe_handler,
|
||||||
ping_handler,
|
ping_handler,
|
||||||
)
|
)
|
||||||
from voice_to_notes.ipc.messages import ready_message
|
from voice_to_notes.ipc.messages import ready_message # noqa: E402
|
||||||
from voice_to_notes.ipc.protocol import read_message, write_message
|
from voice_to_notes.ipc.protocol import read_message, write_message # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def create_registry() -> HandlerRegistry:
|
def create_registry() -> HandlerRegistry:
|
||||||
@@ -26,6 +33,7 @@ def create_registry() -> HandlerRegistry:
|
|||||||
registry.register("transcribe.start", make_transcribe_handler())
|
registry.register("transcribe.start", make_transcribe_handler())
|
||||||
registry.register("hardware.detect", hardware_detect_handler)
|
registry.register("hardware.detect", hardware_detect_handler)
|
||||||
registry.register("diarize.start", make_diarize_handler())
|
registry.register("diarize.start", make_diarize_handler())
|
||||||
|
registry.register("diarize.download", make_diarize_download_handler())
|
||||||
registry.register("pipeline.start", make_pipeline_handler())
|
registry.register("pipeline.start", make_pipeline_handler())
|
||||||
registry.register("export.start", make_export_handler())
|
registry.register("export.start", make_export_handler())
|
||||||
registry.register("ai.chat", make_ai_chat_handler())
|
registry.register("ai.chat", make_ai_chat_handler())
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""LiteLLM provider — multi-provider gateway."""
|
"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -7,36 +7,44 @@ from typing import Any
|
|||||||
from voice_to_notes.providers.base import AIProvider
|
from voice_to_notes.providers.base import AIProvider
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMProvider(AIProvider):
|
class OpenAICompatibleProvider(AIProvider):
|
||||||
"""Routes through LiteLLM for access to 100+ LLM providers."""
|
"""Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""
|
||||||
|
|
||||||
def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_key: str | None = None,
|
||||||
|
api_base: str | None = None,
|
||||||
|
model: str = "gpt-4o-mini",
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._api_key = api_key or "sk-no-key"
|
||||||
|
self._api_base = api_base
|
||||||
self._model = model
|
self._model = model
|
||||||
self._extra_kwargs = kwargs
|
self._extra_kwargs = kwargs
|
||||||
|
|
||||||
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
|
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
|
||||||
try:
|
from openai import OpenAI
|
||||||
import litellm
|
|
||||||
except ImportError:
|
|
||||||
raise RuntimeError("litellm package is required. Install with: pip install litellm")
|
|
||||||
|
|
||||||
merged_kwargs = {**self._extra_kwargs, **kwargs}
|
client_kwargs: dict[str, Any] = {"api_key": self._api_key}
|
||||||
response = litellm.completion(
|
if self._api_base:
|
||||||
model=merged_kwargs.get("model", self._model),
|
client_kwargs["base_url"] = self._api_base
|
||||||
|
|
||||||
|
client = OpenAI(**client_kwargs)
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=kwargs.get("model", self._model),
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=merged_kwargs.get("temperature", 0.7),
|
temperature=kwargs.get("temperature", 0.7),
|
||||||
max_tokens=merged_kwargs.get("max_tokens", 2048),
|
max_tokens=kwargs.get("max_tokens", 2048),
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content or ""
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
def is_available(self) -> bool:
|
def is_available(self) -> bool:
|
||||||
try:
|
try:
|
||||||
import litellm # noqa: F401
|
import openai # noqa: F401
|
||||||
|
return bool(self._api_key and self._api_base)
|
||||||
return True
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "LiteLLM"
|
return "OpenAI Compatible"
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ class AIProviderService:
|
|||||||
def create_default_service() -> AIProviderService:
|
def create_default_service() -> AIProviderService:
|
||||||
"""Create an AIProviderService with all supported providers registered."""
|
"""Create an AIProviderService with all supported providers registered."""
|
||||||
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
|
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
|
||||||
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
|
from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
|
||||||
from voice_to_notes.providers.local_provider import LocalProvider
|
from voice_to_notes.providers.local_provider import LocalProvider
|
||||||
from voice_to_notes.providers.openai_provider import OpenAIProvider
|
from voice_to_notes.providers.openai_provider import OpenAIProvider
|
||||||
|
|
||||||
@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
|
|||||||
service.register_provider("local", LocalProvider())
|
service.register_provider("local", LocalProvider())
|
||||||
service.register_provider("openai", OpenAIProvider())
|
service.register_provider("openai", OpenAIProvider())
|
||||||
service.register_provider("anthropic", AnthropicProvider())
|
service.register_provider("anthropic", AnthropicProvider())
|
||||||
service.register_provider("litellm", LiteLLMProvider())
|
service.register_provider("litellm", OpenAICompatibleProvider())
|
||||||
return service
|
return service
|
||||||
|
|||||||
@@ -2,15 +2,69 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
# Disable pyannote telemetry — it has a bug in v4.0.4 where
|
||||||
|
# np.isfinite(None) crashes when max_speakers is not set.
|
||||||
|
os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
|
||||||
|
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
|
||||||
from voice_to_notes.ipc.messages import progress_message
|
from voice_to_notes.ipc.messages import progress_message
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_wav(file_path: str) -> tuple[str, str | None]:
|
||||||
|
"""Convert audio to 16kHz mono WAV if needed.
|
||||||
|
|
||||||
|
pyannote.audio v4.0.4 has a bug where its AudioDecoder returns
|
||||||
|
duration=None for some formats (FLAC, etc.), causing crashes.
|
||||||
|
Converting to WAV ensures the duration header is always present.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(path_to_use, temp_path_or_None)
|
||||||
|
If conversion was needed, temp_path is the WAV file to clean up.
|
||||||
|
"""
|
||||||
|
ext = Path(file_path).suffix.lower()
|
||||||
|
if ext == ".wav":
|
||||||
|
return file_path, None
|
||||||
|
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.close()
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
get_ffmpeg_path(), "-y", "-i", file_path,
|
||||||
|
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
||||||
|
tmp.name,
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"[sidecar] Converted {ext} to WAV for diarization",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
return tmp.name, tmp.name
|
||||||
|
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
||||||
|
# ffmpeg not available or failed — try original file and hope for the best
|
||||||
|
print(
|
||||||
|
f"[sidecar] WAV conversion failed ({e}), using original file",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
os.unlink(tmp.name)
|
||||||
|
return file_path, None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SpeakerSegment:
|
class SpeakerSegment:
|
||||||
"""A time span assigned to a speaker."""
|
"""A time span assigned to a speaker."""
|
||||||
@@ -35,45 +89,59 @@ class DiarizeService:
|
|||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._pipeline: Any = None
|
self._pipeline: Any = None
|
||||||
|
|
||||||
def _ensure_pipeline(self) -> Any:
|
def _ensure_pipeline(self, hf_token: str | None = None) -> Any:
|
||||||
"""Load the pyannote diarization pipeline (lazy)."""
|
"""Load the pyannote diarization pipeline (lazy)."""
|
||||||
if self._pipeline is not None:
|
if self._pipeline is not None:
|
||||||
return self._pipeline
|
return self._pipeline
|
||||||
|
|
||||||
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
|
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
|
||||||
|
|
||||||
try:
|
# Use token from argument, fall back to environment variable
|
||||||
from pyannote.audio import Pipeline
|
if not hf_token:
|
||||||
|
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
|
||||||
|
|
||||||
self._pipeline = Pipeline.from_pretrained(
|
# Persist token globally so ALL huggingface_hub sub-downloads use auth.
|
||||||
|
# Pyannote has internal dependencies that don't forward the token= param.
|
||||||
|
if hf_token:
|
||||||
|
os.environ["HF_TOKEN"] = hf_token
|
||||||
|
import huggingface_hub
|
||||||
|
huggingface_hub.login(token=hf_token, add_to_git_credential=False)
|
||||||
|
|
||||||
|
models = [
|
||||||
"pyannote/speaker-diarization-3.1",
|
"pyannote/speaker-diarization-3.1",
|
||||||
use_auth_token=False,
|
"pyannote/speaker-diarization",
|
||||||
)
|
]
|
||||||
except Exception:
|
|
||||||
# Fall back to a simpler approach if the model isn't available
|
last_error: Exception | None = None
|
||||||
# pyannote requires HuggingFace token for some models
|
for model_name in models:
|
||||||
# Try the community model first
|
|
||||||
try:
|
try:
|
||||||
from pyannote.audio import Pipeline
|
from pyannote.audio import Pipeline
|
||||||
|
|
||||||
self._pipeline = Pipeline.from_pretrained(
|
self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
|
||||||
"pyannote/speaker-diarization",
|
print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
|
||||||
use_auth_token=False,
|
# Move pipeline to GPU if available
|
||||||
)
|
try:
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
self._pipeline = self._pipeline.to(torch.device("cuda"))
|
||||||
|
print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
|
||||||
|
return self._pipeline
|
||||||
|
except Exception as e:
|
||||||
|
last_error = e
|
||||||
print(
|
print(
|
||||||
f"[sidecar] Warning: Could not load pyannote pipeline: {e}",
|
f"[sidecar] Warning: Could not load {model_name}: {e}",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"pyannote.audio pipeline not available. "
|
"pyannote.audio pipeline not available. "
|
||||||
"You may need to accept the model license at "
|
"You may need to accept the model license at "
|
||||||
"https://huggingface.co/pyannote/speaker-diarization-3.1 "
|
"https://huggingface.co/pyannote/speaker-diarization-3.1 "
|
||||||
"and set a HF_TOKEN environment variable."
|
"and set a HF_TOKEN environment variable."
|
||||||
) from e
|
) from last_error
|
||||||
|
|
||||||
return self._pipeline
|
|
||||||
|
|
||||||
def diarize(
|
def diarize(
|
||||||
self,
|
self,
|
||||||
@@ -82,6 +150,8 @@ class DiarizeService:
|
|||||||
num_speakers: int | None = None,
|
num_speakers: int | None = None,
|
||||||
min_speakers: int | None = None,
|
min_speakers: int | None = None,
|
||||||
max_speakers: int | None = None,
|
max_speakers: int | None = None,
|
||||||
|
hf_token: str | None = None,
|
||||||
|
audio_duration_sec: float | None = None,
|
||||||
) -> DiarizationResult:
|
) -> DiarizationResult:
|
||||||
"""Run speaker diarization on an audio file.
|
"""Run speaker diarization on an audio file.
|
||||||
|
|
||||||
@@ -99,7 +169,7 @@ class DiarizeService:
|
|||||||
progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
|
progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
|
||||||
)
|
)
|
||||||
|
|
||||||
pipeline = self._ensure_pipeline()
|
pipeline = self._ensure_pipeline(hf_token=hf_token)
|
||||||
|
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
|
progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
|
||||||
@@ -116,8 +186,55 @@ class DiarizeService:
|
|||||||
if max_speakers is not None:
|
if max_speakers is not None:
|
||||||
kwargs["max_speakers"] = max_speakers
|
kwargs["max_speakers"] = max_speakers
|
||||||
|
|
||||||
# Run diarization
|
# Convert to WAV to work around pyannote v4.0.4 duration bug
|
||||||
diarization = pipeline(file_path, **kwargs)
|
audio_path, temp_wav = _ensure_wav(file_path)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"[sidecar] Running diarization on {audio_path} with kwargs: {kwargs}",
|
||||||
|
file=sys.stderr,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run diarization in background thread for progress reporting
|
||||||
|
result_holder: list = [None]
|
||||||
|
error_holder: list[Exception | None] = [None]
|
||||||
|
done_event = threading.Event()
|
||||||
|
|
||||||
|
def _run():
|
||||||
|
try:
|
||||||
|
result_holder[0] = pipeline(audio_path, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
error_holder[0] = e
|
||||||
|
finally:
|
||||||
|
done_event.set()
|
||||||
|
|
||||||
|
thread = threading.Thread(target=_run, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
elapsed = 0.0
|
||||||
|
estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
|
||||||
|
while not done_event.wait(timeout=2.0):
|
||||||
|
elapsed += 2.0
|
||||||
|
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
|
||||||
|
write_message(progress_message(
|
||||||
|
request_id, pct, "diarizing",
|
||||||
|
f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
|
||||||
|
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
# Clean up temp file
|
||||||
|
if temp_wav:
|
||||||
|
os.unlink(temp_wav)
|
||||||
|
|
||||||
|
if error_holder[0] is not None:
|
||||||
|
raise error_holder[0]
|
||||||
|
raw_result = result_holder[0]
|
||||||
|
|
||||||
|
# pyannote 4.0+ returns DiarizeOutput; older versions return Annotation directly
|
||||||
|
if hasattr(raw_result, "speaker_diarization"):
|
||||||
|
diarization = raw_result.speaker_diarization
|
||||||
|
else:
|
||||||
|
diarization = raw_result
|
||||||
|
|
||||||
# Convert pyannote output to our format
|
# Convert pyannote output to our format
|
||||||
result = DiarizationResult()
|
result = DiarizationResult()
|
||||||
|
|||||||
@@ -2,13 +2,19 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from voice_to_notes.ipc.messages import progress_message
|
from voice_to_notes.ipc.messages import (
|
||||||
|
partial_segment_message,
|
||||||
|
progress_message,
|
||||||
|
speaker_update_message,
|
||||||
|
)
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffprobe_path
|
||||||
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
|
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
|
||||||
from voice_to_notes.services.transcribe import (
|
from voice_to_notes.services.transcribe import (
|
||||||
SegmentResult,
|
SegmentResult,
|
||||||
@@ -60,6 +66,7 @@ class PipelineService:
|
|||||||
min_speakers: int | None = None,
|
min_speakers: int | None = None,
|
||||||
max_speakers: int | None = None,
|
max_speakers: int | None = None,
|
||||||
skip_diarization: bool = False,
|
skip_diarization: bool = False,
|
||||||
|
hf_token: str | None = None,
|
||||||
) -> PipelineResult:
|
) -> PipelineResult:
|
||||||
"""Run the full transcription + diarization pipeline.
|
"""Run the full transcription + diarization pipeline.
|
||||||
|
|
||||||
@@ -77,22 +84,59 @@ class PipelineService:
|
|||||||
"""
|
"""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Step 1: Transcribe
|
# Step 0: Probe audio duration for conditional chunked transcription
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
|
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
|
||||||
)
|
)
|
||||||
|
|
||||||
transcription = self._transcribe_service.transcribe(
|
def _emit_segment(seg: SegmentResult, index: int) -> None:
|
||||||
|
write_message(partial_segment_message(request_id, {
|
||||||
|
"index": index,
|
||||||
|
"text": seg.text,
|
||||||
|
"start_ms": seg.start_ms,
|
||||||
|
"end_ms": seg.end_ms,
|
||||||
|
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
|
||||||
|
}))
|
||||||
|
|
||||||
|
audio_duration_sec = None
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
probe_result = subprocess.run(
|
||||||
|
[get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
|
||||||
|
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
||||||
|
capture_output=True, text=True, check=True,
|
||||||
|
)
|
||||||
|
audio_duration_sec = float(probe_result.stdout.strip())
|
||||||
|
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _run_transcription() -> TranscriptionResult:
|
||||||
|
"""Run transcription (chunked or standard based on duration)."""
|
||||||
|
from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
|
||||||
|
if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
|
||||||
|
return self._transcribe_service.transcribe_chunked(
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
device=device,
|
device=device,
|
||||||
compute_type=compute_type,
|
compute_type=compute_type,
|
||||||
language=language,
|
language=language,
|
||||||
|
on_segment=_emit_segment,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return self._transcribe_service.transcribe(
|
||||||
|
request_id=request_id,
|
||||||
|
file_path=file_path,
|
||||||
|
model_name=model_name,
|
||||||
|
device=device,
|
||||||
|
compute_type=compute_type,
|
||||||
|
language=language,
|
||||||
|
on_segment=_emit_segment,
|
||||||
)
|
)
|
||||||
|
|
||||||
if skip_diarization:
|
if skip_diarization:
|
||||||
# Convert transcription directly without speaker labels
|
# Sequential: transcribe only, no diarization needed
|
||||||
|
transcription = _run_transcription()
|
||||||
result = PipelineResult(
|
result = PipelineResult(
|
||||||
language=transcription.language,
|
language=transcription.language,
|
||||||
language_probability=transcription.language_probability,
|
language_probability=transcription.language_probability,
|
||||||
@@ -110,37 +154,63 @@ class PipelineService:
|
|||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 2: Diarize (with graceful fallback)
|
# Parallel execution: run transcription (0-45%) and diarization (45-90%)
|
||||||
|
# concurrently, then merge (90-100%).
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
|
progress_message(
|
||||||
|
request_id, 0, "pipeline",
|
||||||
|
"Starting transcription and diarization in parallel..."
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
diarization = None
|
diarization = None
|
||||||
try:
|
diarization_error = None
|
||||||
diarization = self._diarize_service.diarize(
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
transcription_future = executor.submit(_run_transcription)
|
||||||
|
|
||||||
|
# Use probed audio_duration_sec for diarization progress estimation
|
||||||
|
# (transcription hasn't finished yet, so we can't use transcription.duration_ms)
|
||||||
|
diarization_future = executor.submit(
|
||||||
|
self._diarize_service.diarize,
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
num_speakers=num_speakers,
|
num_speakers=num_speakers,
|
||||||
min_speakers=min_speakers,
|
min_speakers=min_speakers,
|
||||||
max_speakers=max_speakers,
|
max_speakers=max_speakers,
|
||||||
|
hf_token=hf_token,
|
||||||
|
audio_duration_sec=audio_duration_sec,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Wait for both futures. We need the transcription result regardless,
|
||||||
|
# but diarization may fail gracefully.
|
||||||
|
transcription = transcription_future.result()
|
||||||
|
write_message(
|
||||||
|
progress_message(request_id, 45, "pipeline", "Transcription complete")
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
diarization = diarization_future.result()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
diarization_error = e
|
||||||
print(
|
print(
|
||||||
f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
|
f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
traceback.print_exc(file=sys.stderr)
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(
|
progress_message(
|
||||||
request_id, 80, "pipeline",
|
request_id, 80, "pipeline",
|
||||||
"Diarization unavailable, using transcription only..."
|
f"Diarization failed ({e}), using transcription only..."
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 3: Merge (or skip if diarization failed)
|
# Step 3: Merge (or skip if diarization failed)
|
||||||
if diarization is not None:
|
if diarization is not None:
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 90, "pipeline", "Merging transcript with speakers...")
|
progress_message(request_id, 90, "merging", "Merging transcript with speakers...")
|
||||||
)
|
)
|
||||||
result = self._merge_results(transcription, diarization.speaker_segments)
|
result = self._merge_results(transcription, diarization.speaker_segments)
|
||||||
result.speakers = diarization.speakers
|
result.speakers = diarization.speakers
|
||||||
@@ -170,6 +240,10 @@ class PipelineService:
|
|||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
updates = [{"index": i, "speaker": seg.speaker} for i, seg in enumerate(result.segments) if seg.speaker]
|
||||||
|
if updates:
|
||||||
|
write_message(speaker_update_message(request_id, updates))
|
||||||
|
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 100, "done", "Pipeline complete")
|
progress_message(request_id, 100, "done", "Pipeline complete")
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -11,6 +12,10 @@ from faster_whisper import WhisperModel
|
|||||||
|
|
||||||
from voice_to_notes.ipc.messages import progress_message
|
from voice_to_notes.ipc.messages import progress_message
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path
|
||||||
|
|
||||||
|
CHUNK_REPORT_SIZE = 10
|
||||||
|
LARGE_FILE_THRESHOLD_SEC = 3600 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -90,6 +95,7 @@ class TranscribeService:
|
|||||||
device: str = "cpu",
|
device: str = "cpu",
|
||||||
compute_type: str = "int8",
|
compute_type: str = "int8",
|
||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
|
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
||||||
) -> TranscriptionResult:
|
) -> TranscriptionResult:
|
||||||
"""Transcribe an audio file with word-level timestamps.
|
"""Transcribe an audio file with word-level timestamps.
|
||||||
|
|
||||||
@@ -145,17 +151,24 @@ class TranscribeService:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Send progress every few segments
|
if on_segment:
|
||||||
if segment_count % 5 == 0:
|
on_segment(result.segments[-1], segment_count - 1)
|
||||||
|
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(
|
progress_message(
|
||||||
request_id,
|
request_id,
|
||||||
progress_pct,
|
progress_pct,
|
||||||
"transcribing",
|
"transcribing",
|
||||||
f"Processed {segment_count} segments...",
|
f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if segment_count % CHUNK_REPORT_SIZE == 0:
|
||||||
|
write_message(progress_message(
|
||||||
|
request_id, progress_pct, "transcribing",
|
||||||
|
f"Completed chunk of {CHUNK_REPORT_SIZE} segments "
|
||||||
|
f"({segment_count} total, {progress_pct}% of audio)..."))
|
||||||
|
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
print(
|
print(
|
||||||
f"[sidecar] Transcription complete: {segment_count} segments in {elapsed:.1f}s",
|
f"[sidecar] Transcription complete: {segment_count} segments in {elapsed:.1f}s",
|
||||||
@@ -166,6 +179,113 @@ class TranscribeService:
|
|||||||
write_message(progress_message(request_id, 100, "done", "Transcription complete"))
|
write_message(progress_message(request_id, 100, "done", "Transcription complete"))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def transcribe_chunked(
|
||||||
|
self,
|
||||||
|
request_id: str,
|
||||||
|
file_path: str,
|
||||||
|
model_name: str = "base",
|
||||||
|
device: str = "cpu",
|
||||||
|
compute_type: str = "int8",
|
||||||
|
language: str | None = None,
|
||||||
|
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
||||||
|
chunk_duration_sec: int = 300,
|
||||||
|
) -> TranscriptionResult:
|
||||||
|
"""Transcribe a large audio file by splitting into chunks.
|
||||||
|
|
||||||
|
Uses ffmpeg to split the file into chunks, transcribes each chunk,
|
||||||
|
then merges the results with corrected timestamps.
|
||||||
|
|
||||||
|
Falls back to standard transcribe() if ffmpeg is not available.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
# Get total duration via ffprobe
|
||||||
|
try:
|
||||||
|
probe_result = subprocess.run(
|
||||||
|
[get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
|
||||||
|
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
||||||
|
capture_output=True, text=True, check=True,
|
||||||
|
)
|
||||||
|
total_duration = float(probe_result.stdout.strip())
|
||||||
|
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
||||||
|
# ffprobe not available or failed — fall back to standard transcription
|
||||||
|
write_message(progress_message(
|
||||||
|
request_id, 5, "transcribing",
|
||||||
|
"ffmpeg not available, using standard transcription..."))
|
||||||
|
return self.transcribe(request_id, file_path, model_name, device,
|
||||||
|
compute_type, language, on_segment=on_segment)
|
||||||
|
|
||||||
|
num_chunks = max(1, int(total_duration / chunk_duration_sec) + 1)
|
||||||
|
write_message(progress_message(
|
||||||
|
request_id, 5, "transcribing",
|
||||||
|
f"Splitting {total_duration:.0f}s file into {num_chunks} chunks..."))
|
||||||
|
|
||||||
|
merged_result = TranscriptionResult()
|
||||||
|
global_segment_index = 0
|
||||||
|
|
||||||
|
for chunk_idx in range(num_chunks):
|
||||||
|
chunk_start = chunk_idx * chunk_duration_sec
|
||||||
|
if chunk_start >= total_duration:
|
||||||
|
break
|
||||||
|
|
||||||
|
chunk_start_ms = int(chunk_start * 1000)
|
||||||
|
|
||||||
|
# Extract chunk to temp file
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.close()
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
|
||||||
|
"-t", str(chunk_duration_sec),
|
||||||
|
"-i", file_path,
|
||||||
|
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
||||||
|
tmp.name],
|
||||||
|
capture_output=True, check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wrap on_segment to offset the index
|
||||||
|
chunk_on_segment = None
|
||||||
|
if on_segment:
|
||||||
|
base_index = global_segment_index
|
||||||
|
def chunk_on_segment(seg: SegmentResult, idx: int, _base=base_index) -> None:
|
||||||
|
on_segment(seg, _base + idx)
|
||||||
|
|
||||||
|
chunk_result = self.transcribe(
|
||||||
|
request_id, tmp.name, model_name, device,
|
||||||
|
compute_type, language, on_segment=chunk_on_segment,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Offset timestamps and merge
|
||||||
|
for seg in chunk_result.segments:
|
||||||
|
seg.start_ms += chunk_start_ms
|
||||||
|
seg.end_ms += chunk_start_ms
|
||||||
|
for word in seg.words:
|
||||||
|
word.start_ms += chunk_start_ms
|
||||||
|
word.end_ms += chunk_start_ms
|
||||||
|
merged_result.segments.append(seg)
|
||||||
|
|
||||||
|
global_segment_index += len(chunk_result.segments)
|
||||||
|
|
||||||
|
# Take language from first chunk
|
||||||
|
if chunk_idx == 0:
|
||||||
|
merged_result.language = chunk_result.language
|
||||||
|
merged_result.language_probability = chunk_result.language_probability
|
||||||
|
|
||||||
|
finally:
|
||||||
|
import os
|
||||||
|
os.unlink(tmp.name)
|
||||||
|
|
||||||
|
# Chunk progress
|
||||||
|
chunk_pct = min(10 + int(((chunk_idx + 1) / num_chunks) * 80), 90)
|
||||||
|
write_message(progress_message(
|
||||||
|
request_id, chunk_pct, "transcribing",
|
||||||
|
f"Completed chunk {chunk_idx + 1}/{num_chunks}..."))
|
||||||
|
|
||||||
|
merged_result.duration_ms = int(total_duration * 1000)
|
||||||
|
write_message(progress_message(request_id, 100, "done", "Transcription complete"))
|
||||||
|
return merged_result
|
||||||
|
|
||||||
|
|
||||||
def result_to_payload(result: TranscriptionResult) -> dict[str, Any]:
|
def result_to_payload(result: TranscriptionResult) -> dict[str, Any]:
|
||||||
"""Convert TranscriptionResult to IPC payload dict."""
|
"""Convert TranscriptionResult to IPC payload dict."""
|
||||||
|
|||||||
43
python/voice_to_notes/utils/ffmpeg.py
Normal file
43
python/voice_to_notes/utils/ffmpeg.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffmpeg_path() -> str:
|
||||||
|
"""Return the path to the ffmpeg binary.
|
||||||
|
|
||||||
|
When running as a frozen PyInstaller bundle, looks next to sys.executable.
|
||||||
|
Otherwise falls back to the system PATH.
|
||||||
|
"""
|
||||||
|
if getattr(sys, "frozen", False):
|
||||||
|
# Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
|
||||||
|
bundle_dir = os.path.dirname(sys.executable)
|
||||||
|
candidates = [
|
||||||
|
os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
|
||||||
|
os.path.join(bundle_dir, "ffmpeg"),
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return path
|
||||||
|
return "ffmpeg"
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffprobe_path() -> str:
|
||||||
|
"""Return the path to the ffprobe binary.
|
||||||
|
|
||||||
|
When running as a frozen PyInstaller bundle, looks next to sys.executable.
|
||||||
|
Otherwise falls back to the system PATH.
|
||||||
|
"""
|
||||||
|
if getattr(sys, "frozen", False):
|
||||||
|
bundle_dir = os.path.dirname(sys.executable)
|
||||||
|
candidates = [
|
||||||
|
os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
|
||||||
|
os.path.join(bundle_dir, "ffprobe"),
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return path
|
||||||
|
return "ffprobe"
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "voice-to-notes"
|
name = "voice-to-notes"
|
||||||
version = "0.1.0"
|
version = "0.2.5"
|
||||||
description = "Voice to Notes — desktop transcription with speaker identification"
|
description = "Voice to Notes — desktop transcription with speaker identification"
|
||||||
authors = ["Voice to Notes Contributors"]
|
authors = ["Voice to Notes Contributors"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@@ -20,6 +20,7 @@ serde = { version = "1", features = ["derive"] }
|
|||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
rusqlite = { version = "0.31", features = ["bundled"] }
|
rusqlite = { version = "0.31", features = ["bundled"] }
|
||||||
uuid = { version = "1", features = ["v4", "serde"] }
|
uuid = { version = "1", features = ["v4", "serde"] }
|
||||||
|
zip = { version = "2", default-features = false, features = ["deflate"] }
|
||||||
thiserror = "1"
|
thiserror = "1"
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
tauri-plugin-dialog = "2.6.0"
|
tauri-plugin-dialog = "2.6.0"
|
||||||
|
|||||||
0
src-tauri/binaries/.gitkeep
Normal file
0
src-tauri/binaries/.gitkeep
Normal file
@@ -1,3 +1,21 @@
|
|||||||
fn main() {
|
fn main() {
|
||||||
|
// Ensure sidecar.zip exists so tauri-build doesn't fail.
|
||||||
|
// CI replaces this placeholder with the real PyInstaller sidecar archive.
|
||||||
|
let zip_path = std::path::Path::new("sidecar.zip");
|
||||||
|
if !zip_path.exists() {
|
||||||
|
// Minimal valid zip (empty archive): end-of-central-directory record
|
||||||
|
let empty_zip: [u8; 22] = [
|
||||||
|
0x50, 0x4b, 0x05, 0x06, // EOCD signature
|
||||||
|
0x00, 0x00, // disk number
|
||||||
|
0x00, 0x00, // disk with central dir
|
||||||
|
0x00, 0x00, // entries on this disk
|
||||||
|
0x00, 0x00, // total entries
|
||||||
|
0x00, 0x00, 0x00, 0x00, // central dir size
|
||||||
|
0x00, 0x00, 0x00, 0x00, // central dir offset
|
||||||
|
0x00, 0x00, // comment length
|
||||||
|
];
|
||||||
|
std::fs::write(zip_path, empty_zip).expect("Failed to create placeholder sidecar.zip");
|
||||||
|
}
|
||||||
|
|
||||||
tauri_build::build()
|
tauri_build::build()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,7 +39,11 @@ pub fn ai_chat(
|
|||||||
if response.msg_type == "error" {
|
if response.msg_type == "error" {
|
||||||
return Err(format!(
|
return Err(format!(
|
||||||
"AI error: {}",
|
"AI error: {}",
|
||||||
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
|
response
|
||||||
|
.payload
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,11 @@ pub fn export_transcript(
|
|||||||
if response.msg_type == "error" {
|
if response.msg_type == "error" {
|
||||||
return Err(format!(
|
return Err(format!(
|
||||||
"Export error: {}",
|
"Export error: {}",
|
||||||
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
|
response
|
||||||
|
.payload
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,110 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::fs;
|
||||||
use tauri::State;
|
use tauri::State;
|
||||||
|
|
||||||
use crate::db::models::Project;
|
use crate::db::models::Project;
|
||||||
use crate::db::queries;
|
use crate::db::queries;
|
||||||
use crate::state::AppState;
|
use crate::state::AppState;
|
||||||
|
|
||||||
|
// ── File-based project types ────────────────────────────────────
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct ProjectFile {
|
||||||
|
pub version: u32,
|
||||||
|
pub name: String,
|
||||||
|
pub audio_file: String,
|
||||||
|
pub created_at: String,
|
||||||
|
pub segments: Vec<ProjectFileSegment>,
|
||||||
|
pub speakers: Vec<ProjectFileSpeaker>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct ProjectFileSegment {
|
||||||
|
pub text: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub speaker: Option<String>,
|
||||||
|
pub is_edited: bool,
|
||||||
|
pub words: Vec<ProjectFileWord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct ProjectFileWord {
|
||||||
|
pub word: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub confidence: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
pub struct ProjectFileSpeaker {
|
||||||
|
pub label: String,
|
||||||
|
pub display_name: Option<String>,
|
||||||
|
pub color: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Input types for save_project_transcript ──────────────────────
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct WordInput {
|
||||||
|
pub word: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub confidence: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct SegmentInput {
|
||||||
|
pub text: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub speaker: Option<String>, // speaker label, not id
|
||||||
|
pub words: Vec<WordInput>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
pub struct SpeakerInput {
|
||||||
|
pub label: String,
|
||||||
|
pub color: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Output types for load_project_transcript ─────────────────────
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct WordOutput {
|
||||||
|
pub word: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub confidence: Option<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct SegmentOutput {
|
||||||
|
pub id: String,
|
||||||
|
pub text: String,
|
||||||
|
pub start_ms: i64,
|
||||||
|
pub end_ms: i64,
|
||||||
|
pub speaker: Option<String>, // speaker label
|
||||||
|
pub words: Vec<WordOutput>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct SpeakerOutput {
|
||||||
|
pub id: String,
|
||||||
|
pub label: String,
|
||||||
|
pub display_name: Option<String>,
|
||||||
|
pub color: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
pub struct ProjectTranscript {
|
||||||
|
pub file_path: String,
|
||||||
|
pub segments: Vec<SegmentOutput>,
|
||||||
|
pub speakers: Vec<SpeakerOutput>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Commands ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
|
pub fn create_project(name: String, state: State<AppState>) -> Result<Project, String> {
|
||||||
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
@@ -21,3 +122,176 @@ pub fn list_projects(state: State<AppState>) -> Result<Vec<Project>, String> {
|
|||||||
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
queries::list_projects(&conn).map_err(|e| e.to_string())
|
queries::list_projects(&conn).map_err(|e| e.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn delete_project(id: String, state: State<AppState>) -> Result<(), String> {
|
||||||
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
|
queries::delete_project(&conn, &id).map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn update_segment(
|
||||||
|
segment_id: String,
|
||||||
|
new_text: String,
|
||||||
|
state: State<AppState>,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
|
queries::update_segment_text(&conn, &segment_id, &new_text).map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn save_project_transcript(
|
||||||
|
project_id: String,
|
||||||
|
file_path: String,
|
||||||
|
segments: Vec<SegmentInput>,
|
||||||
|
speakers: Vec<SpeakerInput>,
|
||||||
|
state: State<AppState>,
|
||||||
|
) -> Result<Project, String> {
|
||||||
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// 1. Create media file entry
|
||||||
|
let media_file =
|
||||||
|
queries::create_media_file(&conn, &project_id, &file_path).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// 2. Create speaker entries and build label -> id map
|
||||||
|
let mut speaker_map = std::collections::HashMap::new();
|
||||||
|
for speaker_input in &speakers {
|
||||||
|
let speaker = queries::create_speaker(
|
||||||
|
&conn,
|
||||||
|
&project_id,
|
||||||
|
&speaker_input.label,
|
||||||
|
Some(&speaker_input.color),
|
||||||
|
)
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
speaker_map.insert(speaker_input.label.clone(), speaker.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Create segments with words
|
||||||
|
for (index, seg_input) in segments.iter().enumerate() {
|
||||||
|
let speaker_id = seg_input
|
||||||
|
.speaker
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|label| speaker_map.get(label));
|
||||||
|
|
||||||
|
let segment = queries::create_segment(
|
||||||
|
&conn,
|
||||||
|
&project_id,
|
||||||
|
&media_file.id,
|
||||||
|
speaker_id.map(|s| s.as_str()),
|
||||||
|
seg_input.start_ms,
|
||||||
|
seg_input.end_ms,
|
||||||
|
&seg_input.text,
|
||||||
|
index as i32,
|
||||||
|
)
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// Create words for this segment
|
||||||
|
for (word_index, word_input) in seg_input.words.iter().enumerate() {
|
||||||
|
queries::create_word(
|
||||||
|
&conn,
|
||||||
|
&segment.id,
|
||||||
|
&word_input.word,
|
||||||
|
word_input.start_ms,
|
||||||
|
word_input.end_ms,
|
||||||
|
Some(word_input.confidence),
|
||||||
|
word_index as i32,
|
||||||
|
)
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Return updated project info
|
||||||
|
queries::get_project(&conn, &project_id)
|
||||||
|
.map_err(|e| e.to_string())?
|
||||||
|
.ok_or_else(|| "Project not found".to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn load_project_transcript(
|
||||||
|
project_id: String,
|
||||||
|
state: State<AppState>,
|
||||||
|
) -> Result<Option<ProjectTranscript>, String> {
|
||||||
|
let conn = state.db.lock().map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// 1. Get media files for the project
|
||||||
|
let media_files =
|
||||||
|
queries::get_media_files_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
let media_file = match media_files.first() {
|
||||||
|
Some(mf) => mf,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. Get speakers for the project and build id -> label map
|
||||||
|
let speakers =
|
||||||
|
queries::get_speakers_for_project(&conn, &project_id).map_err(|e| e.to_string())?;
|
||||||
|
let speaker_label_map: std::collections::HashMap<String, String> = speakers
|
||||||
|
.iter()
|
||||||
|
.map(|s| (s.id.clone(), s.label.clone()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// 3. Get segments for the media file
|
||||||
|
let db_segments =
|
||||||
|
queries::get_segments_for_media(&conn, &media_file.id).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
// 4. Build output segments with nested words
|
||||||
|
let mut segment_outputs = Vec::with_capacity(db_segments.len());
|
||||||
|
for seg in &db_segments {
|
||||||
|
let words = queries::get_words_for_segment(&conn, &seg.id).map_err(|e| e.to_string())?;
|
||||||
|
let word_outputs: Vec<WordOutput> = words
|
||||||
|
.into_iter()
|
||||||
|
.map(|w| WordOutput {
|
||||||
|
word: w.word,
|
||||||
|
start_ms: w.start_ms,
|
||||||
|
end_ms: w.end_ms,
|
||||||
|
confidence: w.confidence,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let speaker_label = seg
|
||||||
|
.speaker_id
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|sid| speaker_label_map.get(sid))
|
||||||
|
.cloned();
|
||||||
|
|
||||||
|
segment_outputs.push(SegmentOutput {
|
||||||
|
id: seg.id.clone(),
|
||||||
|
text: seg.text.clone(),
|
||||||
|
start_ms: seg.start_ms,
|
||||||
|
end_ms: seg.end_ms,
|
||||||
|
speaker: speaker_label,
|
||||||
|
words: word_outputs,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Build speaker outputs
|
||||||
|
let speaker_outputs: Vec<SpeakerOutput> = speakers
|
||||||
|
.into_iter()
|
||||||
|
.map(|s| SpeakerOutput {
|
||||||
|
id: s.id,
|
||||||
|
label: s.label,
|
||||||
|
display_name: s.display_name,
|
||||||
|
color: s.color,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(Some(ProjectTranscript {
|
||||||
|
file_path: media_file.file_path.clone(),
|
||||||
|
segments: segment_outputs,
|
||||||
|
speakers: speaker_outputs,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── File-based project commands ─────────────────────────────────
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn save_project_file(path: String, project: ProjectFile) -> Result<(), String> {
|
||||||
|
let json = serde_json::to_string_pretty(&project).map_err(|e| e.to_string())?;
|
||||||
|
fs::write(&path, json).map_err(|e| format!("Failed to save project: {e}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn load_project_file(path: String) -> Result<ProjectFile, String> {
|
||||||
|
let json = fs::read_to_string(&path).map_err(|e| format!("Failed to read project: {e}"))?;
|
||||||
|
serde_json::from_str(&json).map_err(|e| format!("Failed to parse project: {e}"))
|
||||||
|
}
|
||||||
|
|||||||
@@ -22,9 +22,7 @@ pub fn llama_start(
|
|||||||
threads: Option<u32>,
|
threads: Option<u32>,
|
||||||
) -> Result<LlamaStatus, String> {
|
) -> Result<LlamaStatus, String> {
|
||||||
let config = LlamaConfig {
|
let config = LlamaConfig {
|
||||||
binary_path: PathBuf::from(
|
binary_path: PathBuf::from(binary_path.unwrap_or_else(|| "llama-server".to_string())),
|
||||||
binary_path.unwrap_or_else(|| "llama-server".to_string()),
|
|
||||||
),
|
|
||||||
model_path: PathBuf::from(model_path),
|
model_path: PathBuf::from(model_path),
|
||||||
port: port.unwrap_or(0),
|
port: port.unwrap_or(0),
|
||||||
n_gpu_layers: n_gpu_layers.unwrap_or(0),
|
n_gpu_layers: n_gpu_layers.unwrap_or(0),
|
||||||
|
|||||||
@@ -33,16 +33,47 @@ pub fn transcribe_file(
|
|||||||
if response.msg_type == "error" {
|
if response.msg_type == "error" {
|
||||||
return Err(format!(
|
return Err(format!(
|
||||||
"Transcription error: {}",
|
"Transcription error: {}",
|
||||||
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
|
response
|
||||||
|
.payload
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(response.payload)
|
Ok(response.payload)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Download and validate the diarization model via the Python sidecar.
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn download_diarize_model(hf_token: String) -> Result<Value, String> {
|
||||||
|
let manager = sidecar();
|
||||||
|
manager.ensure_running()?;
|
||||||
|
|
||||||
|
let request_id = uuid::Uuid::new_v4().to_string();
|
||||||
|
let msg = IPCMessage::new(
|
||||||
|
&request_id,
|
||||||
|
"diarize.download",
|
||||||
|
json!({
|
||||||
|
"hf_token": hf_token,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let response = manager.send_and_receive(&msg)?;
|
||||||
|
|
||||||
|
if response.msg_type == "error" {
|
||||||
|
return Ok(json!({
|
||||||
|
"ok": false,
|
||||||
|
"error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(json!({ "ok": true }))
|
||||||
|
}
|
||||||
|
|
||||||
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub fn run_pipeline(
|
pub async fn run_pipeline(
|
||||||
app: AppHandle,
|
app: AppHandle,
|
||||||
file_path: String,
|
file_path: String,
|
||||||
model: Option<String>,
|
model: Option<String>,
|
||||||
@@ -52,6 +83,7 @@ pub fn run_pipeline(
|
|||||||
min_speakers: Option<u32>,
|
min_speakers: Option<u32>,
|
||||||
max_speakers: Option<u32>,
|
max_speakers: Option<u32>,
|
||||||
skip_diarization: Option<bool>,
|
skip_diarization: Option<bool>,
|
||||||
|
hf_token: Option<String>,
|
||||||
) -> Result<Value, String> {
|
) -> Result<Value, String> {
|
||||||
let manager = sidecar();
|
let manager = sidecar();
|
||||||
manager.ensure_running()?;
|
manager.ensure_running()?;
|
||||||
@@ -70,19 +102,38 @@ pub fn run_pipeline(
|
|||||||
"min_speakers": min_speakers,
|
"min_speakers": min_speakers,
|
||||||
"max_speakers": max_speakers,
|
"max_speakers": max_speakers,
|
||||||
"skip_diarization": skip_diarization.unwrap_or(false),
|
"skip_diarization": skip_diarization.unwrap_or(false),
|
||||||
|
"hf_token": hf_token,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
let response = manager.send_and_receive_with_progress(&msg, |progress| {
|
// Run the blocking sidecar I/O on a separate thread so the async runtime
|
||||||
let _ = app.emit("pipeline-progress", &progress.payload);
|
// can deliver emitted events to the webview while processing is ongoing.
|
||||||
|
let app_handle = app.clone();
|
||||||
|
tauri::async_runtime::spawn_blocking(move || {
|
||||||
|
let response = manager.send_and_receive_with_progress(&msg, |msg| {
|
||||||
|
let event_name = match msg.msg_type.as_str() {
|
||||||
|
"pipeline.segment" => "pipeline-segment",
|
||||||
|
"pipeline.speaker_update" => "pipeline-speaker-update",
|
||||||
|
_ => "pipeline-progress",
|
||||||
|
};
|
||||||
|
if let Err(e) = app_handle.emit(event_name, &msg.payload) {
|
||||||
|
eprintln!("[sidecar-rs] Failed to emit {event_name}: {e}");
|
||||||
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if response.msg_type == "error" {
|
if response.msg_type == "error" {
|
||||||
return Err(format!(
|
return Err(format!(
|
||||||
"Pipeline error: {}",
|
"Pipeline error: {}",
|
||||||
response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown")
|
response
|
||||||
|
.payload
|
||||||
|
.get("message")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(response.payload)
|
Ok(response.payload)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Pipeline task failed: {e}"))?
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,6 +85,57 @@ pub fn delete_project(conn: &Connection, id: &str) -> Result<(), DatabaseError>
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Media Files ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub fn create_media_file(
|
||||||
|
conn: &Connection,
|
||||||
|
project_id: &str,
|
||||||
|
file_path: &str,
|
||||||
|
) -> Result<MediaFile, DatabaseError> {
|
||||||
|
let id = Uuid::new_v4().to_string();
|
||||||
|
let now = Utc::now().to_rfc3339();
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO media_files (id, project_id, file_path, created_at) VALUES (?1, ?2, ?3, ?4)",
|
||||||
|
params![id, project_id, file_path, now],
|
||||||
|
)?;
|
||||||
|
Ok(MediaFile {
|
||||||
|
id,
|
||||||
|
project_id: project_id.to_string(),
|
||||||
|
file_path: file_path.to_string(),
|
||||||
|
file_hash: None,
|
||||||
|
duration_ms: None,
|
||||||
|
sample_rate: None,
|
||||||
|
channels: None,
|
||||||
|
format: None,
|
||||||
|
file_size: None,
|
||||||
|
created_at: now,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_media_files_for_project(
|
||||||
|
conn: &Connection,
|
||||||
|
project_id: &str,
|
||||||
|
) -> Result<Vec<MediaFile>, DatabaseError> {
|
||||||
|
let mut stmt = conn.prepare(
|
||||||
|
"SELECT id, project_id, file_path, file_hash, duration_ms, sample_rate, channels, format, file_size, created_at FROM media_files WHERE project_id = ?1 ORDER BY created_at",
|
||||||
|
)?;
|
||||||
|
let rows = stmt.query_map(params![project_id], |row| {
|
||||||
|
Ok(MediaFile {
|
||||||
|
id: row.get(0)?,
|
||||||
|
project_id: row.get(1)?,
|
||||||
|
file_path: row.get(2)?,
|
||||||
|
file_hash: row.get(3)?,
|
||||||
|
duration_ms: row.get(4)?,
|
||||||
|
sample_rate: row.get(5)?,
|
||||||
|
channels: row.get(6)?,
|
||||||
|
format: row.get(7)?,
|
||||||
|
file_size: row.get(8)?,
|
||||||
|
created_at: row.get(9)?,
|
||||||
|
})
|
||||||
|
})?;
|
||||||
|
Ok(rows.collect::<Result<Vec<_>, _>>()?)
|
||||||
|
}
|
||||||
|
|
||||||
// ── Speakers ──────────────────────────────────────────────────────
|
// ── Speakers ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
pub fn create_speaker(
|
pub fn create_speaker(
|
||||||
@@ -194,6 +245,39 @@ pub fn reassign_speaker(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Segments (create) ────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub fn create_segment(
|
||||||
|
conn: &Connection,
|
||||||
|
project_id: &str,
|
||||||
|
media_file_id: &str,
|
||||||
|
speaker_id: Option<&str>,
|
||||||
|
start_ms: i64,
|
||||||
|
end_ms: i64,
|
||||||
|
text: &str,
|
||||||
|
segment_index: i32,
|
||||||
|
) -> Result<Segment, DatabaseError> {
|
||||||
|
let id = Uuid::new_v4().to_string();
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO segments (id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, is_edited, segment_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, 0, ?8)",
|
||||||
|
params![id, project_id, media_file_id, speaker_id, start_ms, end_ms, text, segment_index],
|
||||||
|
)?;
|
||||||
|
Ok(Segment {
|
||||||
|
id,
|
||||||
|
project_id: project_id.to_string(),
|
||||||
|
media_file_id: media_file_id.to_string(),
|
||||||
|
speaker_id: speaker_id.map(String::from),
|
||||||
|
start_ms,
|
||||||
|
end_ms,
|
||||||
|
text: text.to_string(),
|
||||||
|
original_text: None,
|
||||||
|
confidence: None,
|
||||||
|
is_edited: false,
|
||||||
|
edited_at: None,
|
||||||
|
segment_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// ── Words ─────────────────────────────────────────────────────────
|
// ── Words ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
pub fn get_words_for_segment(
|
pub fn get_words_for_segment(
|
||||||
@@ -217,6 +301,31 @@ pub fn get_words_for_segment(
|
|||||||
Ok(rows.collect::<Result<Vec<_>, _>>()?)
|
Ok(rows.collect::<Result<Vec<_>, _>>()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn create_word(
|
||||||
|
conn: &Connection,
|
||||||
|
segment_id: &str,
|
||||||
|
word: &str,
|
||||||
|
start_ms: i64,
|
||||||
|
end_ms: i64,
|
||||||
|
confidence: Option<f64>,
|
||||||
|
word_index: i32,
|
||||||
|
) -> Result<Word, DatabaseError> {
|
||||||
|
let id = Uuid::new_v4().to_string();
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO words (id, segment_id, word, start_ms, end_ms, confidence, word_index) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
|
||||||
|
params![id, segment_id, word, start_ms, end_ms, confidence, word_index],
|
||||||
|
)?;
|
||||||
|
Ok(Word {
|
||||||
|
id,
|
||||||
|
segment_id: segment_id.to_string(),
|
||||||
|
word: word.to_string(),
|
||||||
|
start_ms,
|
||||||
|
end_ms,
|
||||||
|
confidence,
|
||||||
|
word_index,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -96,11 +96,7 @@ pub fn create_tables(conn: &Connection) -> Result<(), DatabaseError> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Initialize schema version if empty
|
// Initialize schema version if empty
|
||||||
let count: i32 = conn.query_row(
|
let count: i32 = conn.query_row("SELECT COUNT(*) FROM schema_version", [], |row| row.get(0))?;
|
||||||
"SELECT COUNT(*) FROM schema_version",
|
|
||||||
[],
|
|
||||||
|row| row.get(0),
|
|
||||||
)?;
|
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO schema_version (version) VALUES (?1)",
|
"INSERT INTO schema_version (version) VALUES (?1)",
|
||||||
|
|||||||
@@ -4,12 +4,18 @@ pub mod llama;
|
|||||||
pub mod sidecar;
|
pub mod sidecar;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
|
|
||||||
|
use tauri::window::Color;
|
||||||
|
use tauri::Manager;
|
||||||
|
|
||||||
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
||||||
use commands::export::export_transcript;
|
use commands::export::export_transcript;
|
||||||
use commands::project::{create_project, get_project, list_projects};
|
use commands::project::{
|
||||||
|
create_project, delete_project, get_project, list_projects, load_project_file,
|
||||||
|
load_project_transcript, save_project_file, save_project_transcript, update_segment,
|
||||||
|
};
|
||||||
use commands::settings::{load_settings, save_settings};
|
use commands::settings::{load_settings, save_settings};
|
||||||
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
|
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
|
||||||
use commands::transcribe::{run_pipeline, transcribe_file};
|
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
|
||||||
use state::AppState;
|
use state::AppState;
|
||||||
|
|
||||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||||
@@ -20,12 +26,34 @@ pub fn run() {
|
|||||||
.plugin(tauri_plugin_opener::init())
|
.plugin(tauri_plugin_opener::init())
|
||||||
.plugin(tauri_plugin_dialog::init())
|
.plugin(tauri_plugin_dialog::init())
|
||||||
.manage(app_state)
|
.manage(app_state)
|
||||||
|
.setup(|app| {
|
||||||
|
// Tell the sidecar manager where Tauri placed bundled resources
|
||||||
|
// and where to extract the sidecar archive
|
||||||
|
if let (Ok(resource_dir), Ok(data_dir)) =
|
||||||
|
(app.path().resource_dir(), app.path().app_local_data_dir())
|
||||||
|
{
|
||||||
|
sidecar::init_dirs(resource_dir, data_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the webview background to match the app's dark theme
|
||||||
|
if let Some(window) = app.get_webview_window("main") {
|
||||||
|
let _ = window.set_background_color(Some(Color(10, 10, 35, 255)));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
.invoke_handler(tauri::generate_handler![
|
.invoke_handler(tauri::generate_handler![
|
||||||
create_project,
|
create_project,
|
||||||
get_project,
|
get_project,
|
||||||
list_projects,
|
list_projects,
|
||||||
|
delete_project,
|
||||||
|
save_project_transcript,
|
||||||
|
load_project_transcript,
|
||||||
|
update_segment,
|
||||||
|
save_project_file,
|
||||||
|
load_project_file,
|
||||||
transcribe_file,
|
transcribe_file,
|
||||||
run_pipeline,
|
run_pipeline,
|
||||||
|
download_diarize_model,
|
||||||
export_transcript,
|
export_transcript,
|
||||||
ai_chat,
|
ai_chat,
|
||||||
ai_list_providers,
|
ai_list_providers,
|
||||||
|
|||||||
@@ -237,11 +237,7 @@ impl LlamaManager {
|
|||||||
|
|
||||||
/// Get the current status.
|
/// Get the current status.
|
||||||
pub fn status(&self) -> LlamaStatus {
|
pub fn status(&self) -> LlamaStatus {
|
||||||
let running = self
|
let running = self.process.lock().ok().map_or(false, |p| p.is_some());
|
||||||
.process
|
|
||||||
.lock()
|
|
||||||
.ok()
|
|
||||||
.map_or(false, |p| p.is_some());
|
|
||||||
let port = self.port.lock().ok().map_or(0, |p| *p);
|
let port = self.port.lock().ok().map_or(0, |p| *p);
|
||||||
let model = self
|
let model = self
|
||||||
.model_path
|
.model_path
|
||||||
|
|||||||
@@ -2,19 +2,40 @@ pub mod ipc;
|
|||||||
pub mod messages;
|
pub mod messages;
|
||||||
|
|
||||||
use std::io::{BufRead, BufReader, Write};
|
use std::io::{BufRead, BufReader, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::{Child, ChildStdin, Command, Stdio};
|
use std::process::{Child, ChildStdin, Command, Stdio};
|
||||||
use std::sync::{Mutex, OnceLock};
|
use std::sync::{Mutex, OnceLock};
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
use std::os::windows::process::CommandExt;
|
||||||
|
|
||||||
use crate::sidecar::messages::IPCMessage;
|
use crate::sidecar::messages::IPCMessage;
|
||||||
|
|
||||||
|
/// Resource directory set by the Tauri app during setup.
|
||||||
|
static RESOURCE_DIR: OnceLock<PathBuf> = OnceLock::new();
|
||||||
|
/// App data directory for extracting the sidecar archive.
|
||||||
|
static DATA_DIR: OnceLock<PathBuf> = OnceLock::new();
|
||||||
|
|
||||||
|
/// Initialize directories for sidecar resolution.
|
||||||
|
/// Must be called from the Tauri setup before any sidecar operations.
|
||||||
|
pub fn init_dirs(resource_dir: PathBuf, data_dir: PathBuf) {
|
||||||
|
RESOURCE_DIR.set(resource_dir).ok();
|
||||||
|
DATA_DIR.set(data_dir).ok();
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the global sidecar manager singleton.
|
/// Get the global sidecar manager singleton.
|
||||||
pub fn sidecar() -> &'static SidecarManager {
|
pub fn sidecar() -> &'static SidecarManager {
|
||||||
static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
|
static INSTANCE: OnceLock<SidecarManager> = OnceLock::new();
|
||||||
INSTANCE.get_or_init(SidecarManager::new)
|
INSTANCE.get_or_init(SidecarManager::new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manages the Python sidecar process lifecycle.
|
/// Manages the sidecar process lifecycle.
|
||||||
/// Uses separated stdin/stdout ownership to avoid BufReader conflicts.
|
///
|
||||||
|
/// Supports two modes:
|
||||||
|
/// - **Production**: spawns a frozen PyInstaller binary (no Python required)
|
||||||
|
/// - **Dev mode**: spawns system Python with `-m voice_to_notes.main`
|
||||||
|
///
|
||||||
|
/// Dev mode is active when compiled in debug mode or when `VOICE_TO_NOTES_DEV=1`.
|
||||||
pub struct SidecarManager {
|
pub struct SidecarManager {
|
||||||
process: Mutex<Option<Child>>,
|
process: Mutex<Option<Child>>,
|
||||||
stdin: Mutex<Option<ChildStdin>>,
|
stdin: Mutex<Option<ChildStdin>>,
|
||||||
@@ -30,38 +51,251 @@ impl SidecarManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if we should use dev mode (system Python).
|
||||||
|
fn is_dev_mode() -> bool {
|
||||||
|
cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the frozen sidecar binary path (production mode).
|
||||||
|
///
|
||||||
|
/// First checks if the sidecar is already extracted to the app data directory.
|
||||||
|
/// If not, looks for `sidecar.zip` in the Tauri resource directory and extracts it.
|
||||||
|
fn resolve_sidecar_path() -> Result<PathBuf, String> {
|
||||||
|
let binary_name = if cfg!(target_os = "windows") {
|
||||||
|
"voice-to-notes-sidecar.exe"
|
||||||
|
} else {
|
||||||
|
"voice-to-notes-sidecar"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Versioned extraction directory prevents stale sidecar after app updates
|
||||||
|
let extract_dir = DATA_DIR
|
||||||
|
.get()
|
||||||
|
.ok_or("App data directory not initialized")?
|
||||||
|
.join(format!("sidecar-{}", env!("CARGO_PKG_VERSION")));
|
||||||
|
|
||||||
|
let binary_path = extract_dir.join(binary_name);
|
||||||
|
|
||||||
|
// Already extracted — use it directly
|
||||||
|
if binary_path.exists() {
|
||||||
|
return Ok(binary_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find sidecar.zip in resource dir or next to exe
|
||||||
|
let zip_path = Self::find_sidecar_zip()?;
|
||||||
|
Self::extract_zip(&zip_path, &extract_dir)?;
|
||||||
|
|
||||||
|
if !binary_path.exists() {
|
||||||
|
return Err(format!(
|
||||||
|
"Sidecar binary not found after extraction at {}",
|
||||||
|
binary_path.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make executable on Unix
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
if let Ok(meta) = std::fs::metadata(&binary_path) {
|
||||||
|
let mut perms = meta.permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
let _ = std::fs::set_permissions(&binary_path, perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(binary_path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Locate the bundled sidecar.zip archive.
|
||||||
|
fn find_sidecar_zip() -> Result<PathBuf, String> {
|
||||||
|
let mut candidates: Vec<PathBuf> = Vec::new();
|
||||||
|
|
||||||
|
if let Some(resource_dir) = RESOURCE_DIR.get() {
|
||||||
|
candidates.push(resource_dir.join("sidecar.zip"));
|
||||||
|
}
|
||||||
|
if let Ok(exe) = std::env::current_exe() {
|
||||||
|
if let Some(exe_dir) = exe.parent() {
|
||||||
|
candidates.push(exe_dir.join("sidecar.zip"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for path in &candidates {
|
||||||
|
if path.exists() {
|
||||||
|
return Ok(path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!(
|
||||||
|
"Sidecar archive not found. Checked:\n{}",
|
||||||
|
candidates
|
||||||
|
.iter()
|
||||||
|
.map(|p| format!(" {}", p.display()))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n"),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract a zip archive to the given directory.
|
||||||
|
fn extract_zip(zip_path: &Path, dest: &Path) -> Result<(), String> {
|
||||||
|
eprintln!(
|
||||||
|
"[sidecar-rs] Extracting sidecar from {} to {}",
|
||||||
|
zip_path.display(),
|
||||||
|
dest.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Clean destination so we don't mix old and new files
|
||||||
|
if dest.exists() {
|
||||||
|
std::fs::remove_dir_all(dest)
|
||||||
|
.map_err(|e| format!("Failed to clean extraction dir: {e}"))?;
|
||||||
|
}
|
||||||
|
std::fs::create_dir_all(dest)
|
||||||
|
.map_err(|e| format!("Failed to create extraction dir: {e}"))?;
|
||||||
|
|
||||||
|
let file =
|
||||||
|
std::fs::File::open(zip_path).map_err(|e| format!("Cannot open sidecar zip: {e}"))?;
|
||||||
|
let mut archive =
|
||||||
|
zip::ZipArchive::new(file).map_err(|e| format!("Invalid sidecar zip: {e}"))?;
|
||||||
|
|
||||||
|
for i in 0..archive.len() {
|
||||||
|
let mut entry = archive
|
||||||
|
.by_index(i)
|
||||||
|
.map_err(|e| format!("Zip entry error: {e}"))?;
|
||||||
|
|
||||||
|
let name = entry.name().to_string();
|
||||||
|
let outpath = dest.join(&name);
|
||||||
|
|
||||||
|
if entry.is_dir() {
|
||||||
|
std::fs::create_dir_all(&outpath)
|
||||||
|
.map_err(|e| format!("Cannot create dir {}: {e}", outpath.display()))?;
|
||||||
|
} else {
|
||||||
|
if let Some(parent) = outpath.parent() {
|
||||||
|
std::fs::create_dir_all(parent)
|
||||||
|
.map_err(|e| format!("Cannot create dir {}: {e}", parent.display()))?;
|
||||||
|
}
|
||||||
|
let mut outfile = std::fs::File::create(&outpath)
|
||||||
|
.map_err(|e| format!("Cannot create {}: {e}", outpath.display()))?;
|
||||||
|
std::io::copy(&mut entry, &mut outfile)
|
||||||
|
.map_err(|e| format!("Write error for {}: {e}", name))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!("[sidecar-rs] Sidecar extracted successfully");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find a working Python command for the current platform.
|
||||||
|
fn find_python_command() -> &'static str {
|
||||||
|
if cfg!(target_os = "windows") {
|
||||||
|
"python"
|
||||||
|
} else {
|
||||||
|
"python3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the Python sidecar directory for dev mode.
|
||||||
|
fn resolve_python_dir() -> Result<std::path::PathBuf, String> {
|
||||||
|
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||||
|
let python_dir = std::path::Path::new(manifest_dir)
|
||||||
|
.join("../python")
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
||||||
|
|
||||||
|
if python_dir.exists() {
|
||||||
|
return Ok(python_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: relative to current exe
|
||||||
|
let exe = std::env::current_exe().map_err(|e| e.to_string())?;
|
||||||
|
let alt = exe
|
||||||
|
.parent()
|
||||||
|
.ok_or_else(|| "No parent dir".to_string())?
|
||||||
|
.join("../python")
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
||||||
|
|
||||||
|
Ok(alt)
|
||||||
|
}
|
||||||
|
|
||||||
/// Ensure the sidecar is running, starting it if needed.
|
/// Ensure the sidecar is running, starting it if needed.
|
||||||
pub fn ensure_running(&self) -> Result<(), String> {
|
pub fn ensure_running(&self) -> Result<(), String> {
|
||||||
if self.is_running() {
|
if self.is_running() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let python_path = std::env::current_dir()
|
if Self::is_dev_mode() {
|
||||||
.map_err(|e| e.to_string())?
|
self.start_python_dev()
|
||||||
.join("../python")
|
} else {
|
||||||
.canonicalize()
|
let path = Self::resolve_sidecar_path()?;
|
||||||
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
self.start_binary(&path)
|
||||||
|
}
|
||||||
self.start(&python_path.to_string_lossy())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Spawn the Python sidecar process.
|
/// Spawn the frozen sidecar binary (production mode).
|
||||||
pub fn start(&self, python_path: &str) -> Result<(), String> {
|
fn start_binary(&self, path: &std::path::Path) -> Result<(), String> {
|
||||||
// Stop existing process if any
|
|
||||||
self.stop().ok();
|
self.stop().ok();
|
||||||
|
eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
|
||||||
|
|
||||||
let mut child = Command::new("python3")
|
// Log sidecar stderr to a file for diagnostics
|
||||||
|
let stderr_cfg = if let Some(data_dir) = DATA_DIR.get() {
|
||||||
|
let _ = std::fs::create_dir_all(data_dir);
|
||||||
|
let log_path = data_dir.join("sidecar.log");
|
||||||
|
eprintln!("[sidecar-rs] Sidecar stderr → {}", log_path.display());
|
||||||
|
match std::fs::File::create(&log_path) {
|
||||||
|
Ok(f) => Stdio::from(f),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("[sidecar-rs] Failed to create sidecar.log: {e}");
|
||||||
|
Stdio::inherit()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
eprintln!("[sidecar-rs] DATA_DIR not set, sidecar stderr will not be logged");
|
||||||
|
Stdio::inherit()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut cmd = Command::new(path);
|
||||||
|
cmd.stdin(Stdio::piped())
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(stderr_cfg);
|
||||||
|
|
||||||
|
// Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
cmd.creation_flags(0x08000000);
|
||||||
|
|
||||||
|
let child = cmd
|
||||||
|
.spawn()
|
||||||
|
.map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
|
||||||
|
|
||||||
|
self.attach(child)?;
|
||||||
|
self.wait_for_ready()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn the Python sidecar in dev mode (system Python).
|
||||||
|
fn start_python_dev(&self) -> Result<(), String> {
|
||||||
|
self.stop().ok();
|
||||||
|
let python_dir = Self::resolve_python_dir()?;
|
||||||
|
let python_cmd = Self::find_python_command();
|
||||||
|
eprintln!(
|
||||||
|
"[sidecar-rs] Starting dev sidecar: {} -m voice_to_notes.main ({})",
|
||||||
|
python_cmd,
|
||||||
|
python_dir.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
let child = Command::new(python_cmd)
|
||||||
.arg("-m")
|
.arg("-m")
|
||||||
.arg("voice_to_notes.main")
|
.arg("voice_to_notes.main")
|
||||||
.current_dir(python_path)
|
.current_dir(&python_dir)
|
||||||
.env("PYTHONPATH", python_path)
|
.env("PYTHONPATH", &python_dir)
|
||||||
.stdin(Stdio::piped())
|
.stdin(Stdio::piped())
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.stderr(Stdio::inherit())
|
.stderr(Stdio::inherit())
|
||||||
.spawn()
|
.spawn()
|
||||||
.map_err(|e| format!("Failed to start sidecar: {e}"))?;
|
.map_err(|e| format!("Failed to start Python sidecar: {e}"))?;
|
||||||
|
|
||||||
// Take ownership of stdin and stdout separately
|
self.attach(child)?;
|
||||||
|
self.wait_for_ready()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take ownership of a spawned child's stdin/stdout and store the process handle.
|
||||||
|
fn attach(&self, mut child: Child) -> Result<(), String> {
|
||||||
let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
|
let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
|
||||||
let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
|
let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
|
||||||
let buf_reader = BufReader::new(stdout);
|
let buf_reader = BufReader::new(stdout);
|
||||||
@@ -78,10 +312,6 @@ impl SidecarManager {
|
|||||||
let mut r = self.reader.lock().map_err(|e| e.to_string())?;
|
let mut r = self.reader.lock().map_err(|e| e.to_string())?;
|
||||||
*r = Some(buf_reader);
|
*r = Some(buf_reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the "ready" message
|
|
||||||
self.wait_for_ready()?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,7 +326,22 @@ impl SidecarManager {
|
|||||||
.read_line(&mut line)
|
.read_line(&mut line)
|
||||||
.map_err(|e| format!("Read error: {e}"))?;
|
.map_err(|e| format!("Read error: {e}"))?;
|
||||||
if bytes == 0 {
|
if bytes == 0 {
|
||||||
return Err("Sidecar closed stdout before sending ready".to_string());
|
// Try to get the exit code for diagnostics
|
||||||
|
let exit_info = {
|
||||||
|
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||||
|
if let Some(ref mut child) = *proc {
|
||||||
|
match child.try_wait() {
|
||||||
|
Ok(Some(status)) => format!(" (exit status: {status})"),
|
||||||
|
_ => String::new(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return Err(format!(
|
||||||
|
"Sidecar closed stdout before sending ready{exit_info}. \
|
||||||
|
The Python sidecar may have crashed on startup — check app logs for details."
|
||||||
|
));
|
||||||
}
|
}
|
||||||
let trimmed = line.trim();
|
let trimmed = line.trim();
|
||||||
if trimmed.is_empty() {
|
if trimmed.is_empty() {
|
||||||
@@ -107,8 +352,12 @@ impl SidecarManager {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Non-ready message: something is wrong
|
// Non-JSON or non-ready line — skip and keep waiting
|
||||||
break;
|
eprintln!(
|
||||||
|
"[sidecar-rs] Skipping pre-ready line: {}",
|
||||||
|
&trimmed[..trimmed.len().min(200)]
|
||||||
|
);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err("Sidecar did not send ready message".to_string())
|
Err("Sidecar did not send ready message".to_string())
|
||||||
@@ -120,12 +369,51 @@ impl SidecarManager {
|
|||||||
self.send_and_receive_with_progress(msg, |_| {})
|
self.send_and_receive_with_progress(msg, |_| {})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send a message and read the response, calling on_progress for each progress message.
|
/// Send a message and receive the response, calling a callback for intermediate messages.
|
||||||
pub fn send_and_receive_with_progress(
|
/// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
|
||||||
|
///
|
||||||
|
/// If the sidecar has crashed (broken pipe), automatically restarts it and retries once.
|
||||||
|
pub fn send_and_receive_with_progress<F>(
|
||||||
&self,
|
&self,
|
||||||
msg: &IPCMessage,
|
msg: &IPCMessage,
|
||||||
on_progress: impl Fn(&IPCMessage),
|
on_intermediate: F,
|
||||||
) -> Result<IPCMessage, String> {
|
) -> Result<IPCMessage, String>
|
||||||
|
where
|
||||||
|
F: Fn(&IPCMessage),
|
||||||
|
{
|
||||||
|
match self.send_and_receive_inner(msg, &on_intermediate) {
|
||||||
|
Ok(response) => Ok(response),
|
||||||
|
Err(e)
|
||||||
|
if e.contains("Write error")
|
||||||
|
|| e.contains("closed stdout")
|
||||||
|
|| e.contains("not available") =>
|
||||||
|
{
|
||||||
|
eprintln!("[sidecar-rs] Sidecar communication failed ({e}), restarting...");
|
||||||
|
self.cleanup_handles();
|
||||||
|
// Stop any zombie process
|
||||||
|
{
|
||||||
|
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||||
|
if let Some(ref mut child) = proc.take() {
|
||||||
|
let _ = child.kill();
|
||||||
|
let _ = child.wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.ensure_running()?;
|
||||||
|
self.send_and_receive_inner(msg, &on_intermediate)
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inner implementation of send_and_receive.
|
||||||
|
fn send_and_receive_inner<F>(
|
||||||
|
&self,
|
||||||
|
msg: &IPCMessage,
|
||||||
|
on_intermediate: &F,
|
||||||
|
) -> Result<IPCMessage, String>
|
||||||
|
where
|
||||||
|
F: Fn(&IPCMessage),
|
||||||
|
{
|
||||||
// Write to stdin
|
// Write to stdin
|
||||||
{
|
{
|
||||||
let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
|
let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
|
||||||
@@ -160,15 +448,20 @@ impl SidecarManager {
|
|||||||
if trimmed.is_empty() {
|
if trimmed.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let response: IPCMessage = serde_json::from_str(trimmed)
|
let response: IPCMessage =
|
||||||
.map_err(|e| format!("Parse error: {e}"))?;
|
serde_json::from_str(trimmed).map_err(|e| format!("Parse error: {e}"))?;
|
||||||
|
|
||||||
if response.msg_type == "progress" {
|
// Forward intermediate messages via callback, return the final result/error
|
||||||
on_progress(&response);
|
let is_intermediate = matches!(
|
||||||
continue;
|
response.msg_type.as_str(),
|
||||||
}
|
"progress" | "pipeline.segment" | "pipeline.speaker_update"
|
||||||
|
);
|
||||||
|
if is_intermediate {
|
||||||
|
on_intermediate(&response);
|
||||||
|
} else {
|
||||||
return Ok(response);
|
return Ok(response);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Err("Sidecar stdout not available".to_string())
|
Err("Sidecar stdout not available".to_string())
|
||||||
}
|
}
|
||||||
@@ -203,8 +496,39 @@ impl SidecarManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_running(&self) -> bool {
|
pub fn is_running(&self) -> bool {
|
||||||
let proc = self.process.lock().ok();
|
let mut proc = match self.process.lock() {
|
||||||
proc.map_or(false, |p| p.is_some())
|
Ok(p) => p,
|
||||||
|
Err(_) => return false,
|
||||||
|
};
|
||||||
|
if let Some(ref mut child) = *proc {
|
||||||
|
// Check if the process has exited
|
||||||
|
match child.try_wait() {
|
||||||
|
Ok(Some(_status)) => {
|
||||||
|
// Process has exited — clean up handles
|
||||||
|
eprintln!("[sidecar-rs] Sidecar process has exited");
|
||||||
|
drop(proc);
|
||||||
|
let _ = self.cleanup_handles();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
Ok(None) => true, // Still running
|
||||||
|
Err(_) => false,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Clean up stdin/stdout/process handles after the sidecar has exited.
|
||||||
|
fn cleanup_handles(&self) {
|
||||||
|
if let Ok(mut s) = self.stdin.lock() {
|
||||||
|
*s = None;
|
||||||
|
}
|
||||||
|
if let Ok(mut r) = self.reader.lock() {
|
||||||
|
*r = None;
|
||||||
|
}
|
||||||
|
if let Ok(mut p) = self.process.lock() {
|
||||||
|
*p = None;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,12 +15,10 @@ pub struct AppState {
|
|||||||
impl AppState {
|
impl AppState {
|
||||||
pub fn new() -> Result<Self, String> {
|
pub fn new() -> Result<Self, String> {
|
||||||
let data_dir = LlamaManager::data_dir();
|
let data_dir = LlamaManager::data_dir();
|
||||||
std::fs::create_dir_all(&data_dir)
|
std::fs::create_dir_all(&data_dir).map_err(|e| format!("Cannot create data dir: {e}"))?;
|
||||||
.map_err(|e| format!("Cannot create data dir: {e}"))?;
|
|
||||||
|
|
||||||
let db_path = data_dir.join("voice_to_notes.db");
|
let db_path = data_dir.join("voice_to_notes.db");
|
||||||
let conn = db::open_database(&db_path)
|
let conn = db::open_database(&db_path).map_err(|e| format!("Cannot open database: {e}"))?;
|
||||||
.map_err(|e| format!("Cannot open database: {e}"))?;
|
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
db: Mutex::new(conn),
|
db: Mutex::new(conn),
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://schema.tauri.app/config/2",
|
"$schema": "https://schema.tauri.app/config/2",
|
||||||
"productName": "Voice to Notes",
|
"productName": "Voice to Notes",
|
||||||
"version": "0.1.0",
|
"version": "0.2.5",
|
||||||
"identifier": "com.voicetonotes.app",
|
"identifier": "com.voicetonotes.app",
|
||||||
"build": {
|
"build": {
|
||||||
"beforeDevCommand": "npm run dev",
|
"beforeDevCommand": "npm run dev",
|
||||||
@@ -16,7 +16,9 @@
|
|||||||
"width": 1200,
|
"width": 1200,
|
||||||
"height": 800,
|
"height": 800,
|
||||||
"minWidth": 800,
|
"minWidth": 800,
|
||||||
"minHeight": 600
|
"minHeight": 600,
|
||||||
|
"decorations": true,
|
||||||
|
"transparent": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"security": {
|
"security": {
|
||||||
@@ -29,7 +31,7 @@
|
|||||||
},
|
},
|
||||||
"bundle": {
|
"bundle": {
|
||||||
"active": true,
|
"active": true,
|
||||||
"targets": "all",
|
"targets": ["deb", "nsis", "msi", "dmg"],
|
||||||
"icon": [
|
"icon": [
|
||||||
"icons/32x32.png",
|
"icons/32x32.png",
|
||||||
"icons/128x128.png",
|
"icons/128x128.png",
|
||||||
@@ -40,14 +42,12 @@
|
|||||||
"category": "Utility",
|
"category": "Utility",
|
||||||
"shortDescription": "Transcribe audio/video with speaker identification",
|
"shortDescription": "Transcribe audio/video with speaker identification",
|
||||||
"longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
|
"longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
|
||||||
|
"resources": ["sidecar.zip"],
|
||||||
"copyright": "Voice to Notes Contributors",
|
"copyright": "Voice to Notes Contributors",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"linux": {
|
"linux": {
|
||||||
"deb": {
|
"deb": {
|
||||||
"depends": ["python3", "python3-pip"]
|
"depends": []
|
||||||
},
|
|
||||||
"appimage": {
|
|
||||||
"bundleMediaFramework": true
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"windows": {
|
"windows": {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en" style="margin:0;padding:0;background:#0a0a23;height:100%;">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8" />
|
<meta charset="utf-8" />
|
||||||
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
|
<link rel="icon" href="%sveltekit.assets%/favicon.png" />
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
<title>Voice to Notes</title>
|
<title>Voice to Notes</title>
|
||||||
%sveltekit.head%
|
%sveltekit.head%
|
||||||
</head>
|
</head>
|
||||||
<body data-sveltekit-preload-data="hover">
|
<body data-sveltekit-preload-data="hover" style="margin:0;padding:0;background:#0a0a23;overflow:hidden;">
|
||||||
<div style="display: contents">%sveltekit.body%</div>
|
<div style="display: contents">%sveltekit.body%</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { invoke } from '@tauri-apps/api/core';
|
import { invoke } from '@tauri-apps/api/core';
|
||||||
import { segments, speakers } from '$lib/stores/transcript';
|
import { segments, speakers } from '$lib/stores/transcript';
|
||||||
|
import { settings } from '$lib/stores/settings';
|
||||||
|
|
||||||
interface ChatMessage {
|
interface ChatMessage {
|
||||||
role: 'user' | 'assistant';
|
role: 'user' | 'assistant';
|
||||||
@@ -43,9 +44,23 @@
|
|||||||
content: m.content,
|
content: m.content,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Ensure the provider is configured with current credentials before chatting
|
||||||
|
const s = $settings;
|
||||||
|
const configMap: Record<string, Record<string, string>> = {
|
||||||
|
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
||||||
|
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
||||||
|
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
||||||
|
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
|
||||||
|
};
|
||||||
|
const config = configMap[s.ai_provider];
|
||||||
|
if (config) {
|
||||||
|
await invoke('ai_configure', { provider: s.ai_provider, config });
|
||||||
|
}
|
||||||
|
|
||||||
const result = await invoke<{ response: string }>('ai_chat', {
|
const result = await invoke<{ response: string }>('ai_chat', {
|
||||||
messages: chatMessages,
|
messages: chatMessages,
|
||||||
transcriptContext: getTranscriptContext(),
|
transcriptContext: getTranscriptContext(),
|
||||||
|
provider: s.ai_provider,
|
||||||
});
|
});
|
||||||
|
|
||||||
messages = [...messages, { role: 'assistant', content: result.response }];
|
messages = [...messages, { role: 'assistant', content: result.response }];
|
||||||
@@ -73,6 +88,88 @@
|
|||||||
messages = [];
|
messages = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatMarkdown(text: string): string {
|
||||||
|
// Split into lines for block-level processing
|
||||||
|
const lines = text.split('\n');
|
||||||
|
const result: string[] = [];
|
||||||
|
let inList = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < lines.length; i++) {
|
||||||
|
let line = lines[i];
|
||||||
|
|
||||||
|
// Headers
|
||||||
|
if (line.startsWith('### ')) {
|
||||||
|
if (inList) { result.push('</ul>'); inList = false; }
|
||||||
|
const content = applyInlineFormatting(line.slice(4));
|
||||||
|
result.push(`<h4>${content}</h4>`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.startsWith('## ')) {
|
||||||
|
if (inList) { result.push('</ul>'); inList = false; }
|
||||||
|
const content = applyInlineFormatting(line.slice(3));
|
||||||
|
result.push(`<h3>${content}</h3>`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.startsWith('# ')) {
|
||||||
|
if (inList) { result.push('</ul>'); inList = false; }
|
||||||
|
const content = applyInlineFormatting(line.slice(2));
|
||||||
|
result.push(`<h2>${content}</h2>`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List items (- or *)
|
||||||
|
if (/^[\-\*] /.test(line)) {
|
||||||
|
if (!inList) { result.push('<ul>'); inList = true; }
|
||||||
|
const content = applyInlineFormatting(line.slice(2));
|
||||||
|
result.push(`<li>${content}</li>`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Numbered list items
|
||||||
|
if (/^\d+\.\s/.test(line)) {
|
||||||
|
if (!inList) { result.push('<ol>'); inList = true; }
|
||||||
|
const content = applyInlineFormatting(line.replace(/^\d+\.\s/, ''));
|
||||||
|
result.push(`<li>${content}</li>`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-list line: close any open list
|
||||||
|
if (inList) {
|
||||||
|
// Check if previous list was ordered or unordered
|
||||||
|
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
|
||||||
|
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
|
||||||
|
inList = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty line = paragraph break
|
||||||
|
if (line.trim() === '') {
|
||||||
|
result.push('<br>');
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular text line
|
||||||
|
result.push(applyInlineFormatting(line));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close any trailing open list
|
||||||
|
if (inList) {
|
||||||
|
const lastOpen = result.findLast(r => r === '<ul>' || r === '<ol>');
|
||||||
|
result.push(lastOpen === '<ol>' ? '</ol>' : '</ul>');
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyInlineFormatting(text: string): string {
|
||||||
|
// Code blocks (backtick) — process first to avoid conflicts
|
||||||
|
text = text.replace(/`([^`]+)`/g, '<code>$1</code>');
|
||||||
|
// Bold (**text**)
|
||||||
|
text = text.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
|
||||||
|
// Italic (*text*) — only single asterisks not already consumed by bold
|
||||||
|
text = text.replace(/\*([^*]+)\*/g, '<em>$1</em>');
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
// Quick action buttons
|
// Quick action buttons
|
||||||
async function summarize() {
|
async function summarize() {
|
||||||
inputText = 'Please summarize this transcript in bullet points.';
|
inputText = 'Please summarize this transcript in bullet points.';
|
||||||
@@ -107,7 +204,11 @@
|
|||||||
{:else}
|
{:else}
|
||||||
{#each messages as msg}
|
{#each messages as msg}
|
||||||
<div class="message {msg.role}">
|
<div class="message {msg.role}">
|
||||||
|
{#if msg.role === 'assistant'}
|
||||||
|
<div class="message-content">{@html formatMarkdown(msg.content)}</div>
|
||||||
|
{:else}
|
||||||
<div class="message-content">{msg.content}</div>
|
<div class="message-content">{msg.content}</div>
|
||||||
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
{/each}
|
{/each}
|
||||||
{#if isLoading}
|
{#if isLoading}
|
||||||
@@ -177,47 +278,101 @@
|
|||||||
}
|
}
|
||||||
.empty-state {
|
.empty-state {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
color: #666;
|
color: #888;
|
||||||
font-size: 0.8rem;
|
font-size: 0.85rem;
|
||||||
padding: 1rem 0;
|
padding: 2rem 1rem;
|
||||||
|
}
|
||||||
|
.empty-state p {
|
||||||
|
margin-bottom: 1rem;
|
||||||
}
|
}
|
||||||
.quick-actions {
|
.quick-actions {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 0.5rem;
|
gap: 0.75rem;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
margin-top: 0.5rem;
|
margin-top: 1rem;
|
||||||
}
|
}
|
||||||
.quick-btn {
|
.quick-btn {
|
||||||
background: rgba(233, 69, 96, 0.15);
|
background: rgba(233, 69, 96, 0.15);
|
||||||
border: 1px solid rgba(233, 69, 96, 0.3);
|
border: 1px solid rgba(233, 69, 96, 0.3);
|
||||||
color: #e94560;
|
color: #e94560;
|
||||||
padding: 0.3rem 0.6rem;
|
padding: 0.45rem 0.85rem;
|
||||||
border-radius: 4px;
|
border-radius: 6px;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.75rem;
|
font-size: 0.8rem;
|
||||||
|
transition: background 0.15s;
|
||||||
}
|
}
|
||||||
.quick-btn:hover {
|
.quick-btn:hover {
|
||||||
background: rgba(233, 69, 96, 0.25);
|
background: rgba(233, 69, 96, 0.25);
|
||||||
}
|
}
|
||||||
.message {
|
.message {
|
||||||
margin-bottom: 0.5rem;
|
margin-bottom: 0.75rem;
|
||||||
padding: 0.5rem 0.75rem;
|
padding: 0.75rem 1rem;
|
||||||
border-radius: 6px;
|
border-radius: 8px;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
line-height: 1.4;
|
line-height: 1.55;
|
||||||
}
|
}
|
||||||
.message.user {
|
.message.user {
|
||||||
background: rgba(233, 69, 96, 0.15);
|
background: rgba(233, 69, 96, 0.15);
|
||||||
margin-left: 1rem;
|
border-left: 3px solid rgba(233, 69, 96, 0.4);
|
||||||
}
|
}
|
||||||
.message.assistant {
|
.message.assistant {
|
||||||
background: rgba(255, 255, 255, 0.05);
|
background: rgba(255, 255, 255, 0.05);
|
||||||
margin-right: 1rem;
|
border-left: 3px solid rgba(255, 255, 255, 0.1);
|
||||||
}
|
}
|
||||||
.message.loading {
|
.message.loading {
|
||||||
opacity: 0.6;
|
opacity: 0.6;
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Markdown styles inside assistant messages */
|
||||||
|
.message.assistant :global(h2) {
|
||||||
|
font-size: 1rem;
|
||||||
|
font-weight: 600;
|
||||||
|
margin: 0.6rem 0 0.3rem;
|
||||||
|
color: #f0f0f0;
|
||||||
|
}
|
||||||
|
.message.assistant :global(h3) {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
font-weight: 600;
|
||||||
|
margin: 0.5rem 0 0.25rem;
|
||||||
|
color: #e8e8e8;
|
||||||
|
}
|
||||||
|
.message.assistant :global(h4) {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
font-weight: 600;
|
||||||
|
margin: 0.4rem 0 0.2rem;
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
.message.assistant :global(strong) {
|
||||||
|
color: #f0f0f0;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
.message.assistant :global(em) {
|
||||||
|
color: #ccc;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
.message.assistant :global(code) {
|
||||||
|
background: rgba(0, 0, 0, 0.3);
|
||||||
|
color: #e94560;
|
||||||
|
padding: 0.1rem 0.35rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-family: 'Fira Code', 'Cascadia Code', 'Consolas', monospace;
|
||||||
|
}
|
||||||
|
.message.assistant :global(ul),
|
||||||
|
.message.assistant :global(ol) {
|
||||||
|
margin: 0.35rem 0;
|
||||||
|
padding-left: 1.3rem;
|
||||||
|
}
|
||||||
|
.message.assistant :global(li) {
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
.message.assistant :global(br) {
|
||||||
|
display: block;
|
||||||
|
content: '';
|
||||||
|
margin-top: 0.35rem;
|
||||||
|
}
|
||||||
.chat-input {
|
.chat-input {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 0.5rem;
|
gap: 0.5rem;
|
||||||
|
|||||||
@@ -7,16 +7,88 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
|
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
|
||||||
|
|
||||||
|
// Pipeline steps in order
|
||||||
|
const pipelineSteps = [
|
||||||
|
{ key: 'loading_model', label: 'Load transcription model' },
|
||||||
|
{ key: 'transcribing', label: 'Transcribe audio' },
|
||||||
|
{ key: 'loading_diarization', label: 'Load speaker detection model' },
|
||||||
|
{ key: 'diarizing', label: 'Identify speakers' },
|
||||||
|
{ key: 'merging', label: 'Merge results' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const stepOrder = pipelineSteps.map(s => s.key);
|
||||||
|
|
||||||
|
// Track the highest step index we've reached (never goes backward)
|
||||||
|
let highestStepIdx = $state(-1);
|
||||||
|
|
||||||
|
// Map non-step stages to step indices for progress tracking
|
||||||
|
function stageToStepIdx(s: string): number {
|
||||||
|
const direct = stepOrder.indexOf(s);
|
||||||
|
if (direct >= 0) return direct;
|
||||||
|
// 'pipeline' stage appears before known steps — don't change highwater mark
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
if (!visible) {
|
||||||
|
highestStepIdx = -1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const idx = stageToStepIdx(stage);
|
||||||
|
if (idx > highestStepIdx) {
|
||||||
|
highestStepIdx = idx;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
|
||||||
|
if (stepIdx < highestStepIdx) return 'done';
|
||||||
|
if (stepIdx === highestStepIdx) return 'active';
|
||||||
|
return 'pending';
|
||||||
|
}
|
||||||
|
|
||||||
|
// User-friendly display of current stage
|
||||||
|
const stageLabels: Record<string, string> = {
|
||||||
|
'pipeline': 'Initializing...',
|
||||||
|
'loading_model': 'Loading Model',
|
||||||
|
'transcribing': 'Transcribing',
|
||||||
|
'loading_diarization': 'Loading Diarization',
|
||||||
|
'diarizing': 'Speaker Detection',
|
||||||
|
'merging': 'Merging Results',
|
||||||
|
'done': 'Complete',
|
||||||
|
};
|
||||||
|
|
||||||
|
let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
{#if visible}
|
{#if visible}
|
||||||
<div class="overlay">
|
<div class="overlay">
|
||||||
<div class="progress-card">
|
<div class="progress-card">
|
||||||
<h3>{stage}</h3>
|
<div class="spinner-row">
|
||||||
<div class="bar-track">
|
<div class="spinner"></div>
|
||||||
<div class="bar-fill" style="width: {percent}%"></div>
|
<h3>{displayStage}</h3>
|
||||||
</div>
|
</div>
|
||||||
<p>{percent}% — {message}</p>
|
|
||||||
|
<div class="steps">
|
||||||
|
{#each pipelineSteps as step, idx}
|
||||||
|
{@const status = getStepStatus(idx)}
|
||||||
|
<div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
|
||||||
|
<span class="step-icon">
|
||||||
|
{#if status === 'done'}
|
||||||
|
✓
|
||||||
|
{:else if status === 'active'}
|
||||||
|
⟳
|
||||||
|
{:else}
|
||||||
|
·
|
||||||
|
{/if}
|
||||||
|
</span>
|
||||||
|
<span class="step-label">{step.label}</span>
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p class="status-text">{message || 'Please wait...'}</p>
|
||||||
|
<p class="hint-text">This may take several minutes for large files</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -25,34 +97,81 @@
|
|||||||
.overlay {
|
.overlay {
|
||||||
position: fixed;
|
position: fixed;
|
||||||
inset: 0;
|
inset: 0;
|
||||||
background: rgba(0, 0, 0, 0.7);
|
background: rgba(0, 0, 0, 0.8);
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
z-index: 1000;
|
z-index: 9999;
|
||||||
}
|
}
|
||||||
.progress-card {
|
.progress-card {
|
||||||
background: #16213e;
|
background: #16213e;
|
||||||
padding: 2rem;
|
padding: 2rem 2.5rem;
|
||||||
border-radius: 12px;
|
border-radius: 12px;
|
||||||
min-width: 400px;
|
min-width: 380px;
|
||||||
|
max-width: 440px;
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
|
border: 1px solid #2a3a5e;
|
||||||
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
|
||||||
}
|
}
|
||||||
h3 { margin: 0 0 1rem; text-transform: capitalize; }
|
.spinner-row {
|
||||||
.bar-track {
|
display: flex;
|
||||||
height: 8px;
|
align-items: center;
|
||||||
background: #0f3460;
|
gap: 0.75rem;
|
||||||
border-radius: 4px;
|
margin-bottom: 1.25rem;
|
||||||
overflow: hidden;
|
|
||||||
}
|
}
|
||||||
.bar-fill {
|
.spinner {
|
||||||
height: 100%;
|
width: 20px;
|
||||||
background: #e94560;
|
height: 20px;
|
||||||
transition: width 0.3s;
|
border: 3px solid #2a3a5e;
|
||||||
|
border-top-color: #e94560;
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
p {
|
@keyframes spin {
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
|
}
|
||||||
|
h3 {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 1.1rem;
|
||||||
|
}
|
||||||
|
.steps {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.4rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
.step {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
.step-done {
|
||||||
|
color: #4ecdc4;
|
||||||
|
}
|
||||||
|
.step-active {
|
||||||
|
color: #e0e0e0;
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
.step-icon {
|
||||||
|
width: 1.2rem;
|
||||||
|
text-align: center;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.step-active .step-icon {
|
||||||
|
animation: spin 1.5s linear infinite;
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
.status-text {
|
||||||
|
margin: 0.75rem 0 0;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: #b0b0b0;
|
||||||
|
}
|
||||||
|
.hint-text {
|
||||||
margin: 0.5rem 0 0;
|
margin: 0.5rem 0 0;
|
||||||
font-size: 0.875rem;
|
font-size: 0.75rem;
|
||||||
color: #999;
|
color: #555;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import { invoke } from '@tauri-apps/api/core';
|
||||||
|
import { openUrl } from '@tauri-apps/plugin-opener';
|
||||||
import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
|
import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
@@ -9,7 +11,34 @@
|
|||||||
let { visible, onClose }: Props = $props();
|
let { visible, onClose }: Props = $props();
|
||||||
|
|
||||||
let localSettings = $state<AppSettings>({ ...$settings });
|
let localSettings = $state<AppSettings>({ ...$settings });
|
||||||
let activeTab = $state<'transcription' | 'ai' | 'local'>('transcription');
|
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
||||||
|
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
|
||||||
|
let modelError = $state('');
|
||||||
|
let revealedFields = $state<Set<string>>(new Set());
|
||||||
|
|
||||||
|
async function testAndDownloadModel() {
|
||||||
|
if (!localSettings.hf_token) {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = 'Please enter a HuggingFace token first.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
modelStatus = 'downloading';
|
||||||
|
modelError = '';
|
||||||
|
try {
|
||||||
|
const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
|
||||||
|
hfToken: localSettings.hf_token,
|
||||||
|
});
|
||||||
|
if (result.ok) {
|
||||||
|
modelStatus = 'success';
|
||||||
|
} else {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = result.error || 'Unknown error';
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = String(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sync when settings store changes
|
// Sync when settings store changes
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
@@ -46,6 +75,9 @@
|
|||||||
<button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
|
<button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
|
||||||
Transcription
|
Transcription
|
||||||
</button>
|
</button>
|
||||||
|
<button class="tab" class:active={activeTab === 'speakers'} onclick={() => activeTab = 'speakers'}>
|
||||||
|
Speakers
|
||||||
|
</button>
|
||||||
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
|
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
|
||||||
AI Provider
|
AI Provider
|
||||||
</button>
|
</button>
|
||||||
@@ -77,10 +109,72 @@
|
|||||||
<label for="stt-lang">Language (blank = auto-detect)</label>
|
<label for="stt-lang">Language (blank = auto-detect)</label>
|
||||||
<input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
|
<input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
|
||||||
</div>
|
</div>
|
||||||
<div class="field checkbox">
|
{:else if activeTab === 'speakers'}
|
||||||
|
<div class="field">
|
||||||
|
<label for="hf-token">HuggingFace Token</label>
|
||||||
|
<div class="input-reveal">
|
||||||
|
<input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="info-box">
|
||||||
|
<p class="info-title">Setup (one-time)</p>
|
||||||
|
<p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
|
||||||
|
<ol>
|
||||||
|
<li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
|
||||||
|
<li>Accept the license on <strong>all three</strong> of these pages:
|
||||||
|
<ul>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-community-1')}>pyannote/speaker-diarization-community-1</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
|
||||||
|
<li>Paste the token above and click <strong>Test & Download</strong></li>
|
||||||
|
</ol>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
class="btn-download"
|
||||||
|
onclick={testAndDownloadModel}
|
||||||
|
disabled={modelStatus === 'downloading'}
|
||||||
|
>
|
||||||
|
{#if modelStatus === 'downloading'}
|
||||||
|
Downloading model...
|
||||||
|
{:else}
|
||||||
|
Test & Download Model
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
{#if modelStatus === 'success'}
|
||||||
|
<p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
|
||||||
|
{/if}
|
||||||
|
{#if modelStatus === 'error'}
|
||||||
|
<p class="status-error">{modelError}</p>
|
||||||
|
{/if}
|
||||||
|
<div class="field" style="margin-top: 1rem;">
|
||||||
|
<label for="num-speakers">Number of speakers</label>
|
||||||
|
<select
|
||||||
|
id="num-speakers"
|
||||||
|
value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
|
||||||
|
onchange={(e) => {
|
||||||
|
const v = parseInt((e.target as HTMLSelectElement).value, 10);
|
||||||
|
localSettings.num_speakers = v === 0 ? null : v;
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<option value="0">Auto-detect</option>
|
||||||
|
{#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
|
||||||
|
<option value={String(n)}>{n}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
<p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
|
||||||
|
</div>
|
||||||
|
<div class="field checkbox" style="margin-top: 1rem;">
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
||||||
Skip speaker diarization (faster, no speaker labels)
|
Skip speaker detection (faster, no speaker labels)
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
{:else if activeTab === 'ai'}
|
{:else if activeTab === 'ai'}
|
||||||
@@ -90,14 +184,17 @@
|
|||||||
<option value="local">Local (llama-server)</option>
|
<option value="local">Local (llama-server)</option>
|
||||||
<option value="openai">OpenAI</option>
|
<option value="openai">OpenAI</option>
|
||||||
<option value="anthropic">Anthropic</option>
|
<option value="anthropic">Anthropic</option>
|
||||||
<option value="litellm">LiteLLM</option>
|
<option value="litellm">OpenAI Compatible</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{#if localSettings.ai_provider === 'openai'}
|
{#if localSettings.ai_provider === 'openai'}
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="openai-key">OpenAI API Key</label>
|
<label for="openai-key">OpenAI API Key</label>
|
||||||
<input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
|
<div class="input-reveal">
|
||||||
|
<input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="openai-model">Model</label>
|
<label for="openai-model">Model</label>
|
||||||
@@ -106,13 +203,27 @@
|
|||||||
{:else if localSettings.ai_provider === 'anthropic'}
|
{:else if localSettings.ai_provider === 'anthropic'}
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="anthropic-key">Anthropic API Key</label>
|
<label for="anthropic-key">Anthropic API Key</label>
|
||||||
<input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
|
<div class="input-reveal">
|
||||||
|
<input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="anthropic-model">Model</label>
|
<label for="anthropic-model">Model</label>
|
||||||
<input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
|
<input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
|
||||||
</div>
|
</div>
|
||||||
{:else if localSettings.ai_provider === 'litellm'}
|
{:else if localSettings.ai_provider === 'litellm'}
|
||||||
|
<div class="field">
|
||||||
|
<label for="litellm-base">API Base URL</label>
|
||||||
|
<input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="litellm-key">API Key</label>
|
||||||
|
<div class="input-reveal">
|
||||||
|
<input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="litellm-model">Model</label>
|
<label for="litellm-model">Model</label>
|
||||||
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
|
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
|
||||||
@@ -220,11 +331,36 @@
|
|||||||
color: #aaa;
|
color: #aaa;
|
||||||
margin-bottom: 0.3rem;
|
margin-bottom: 0.3rem;
|
||||||
}
|
}
|
||||||
|
.input-reveal {
|
||||||
|
display: flex;
|
||||||
|
gap: 0;
|
||||||
|
}
|
||||||
|
.input-reveal input {
|
||||||
|
flex: 1;
|
||||||
|
border-top-right-radius: 0;
|
||||||
|
border-bottom-right-radius: 0;
|
||||||
|
}
|
||||||
|
.reveal-btn {
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
border-left: none;
|
||||||
|
color: #aaa;
|
||||||
|
padding: 0.5rem 0.6rem;
|
||||||
|
border-radius: 0 4px 4px 0;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.reveal-btn:hover {
|
||||||
|
color: #e0e0e0;
|
||||||
|
background: #1a4a7a;
|
||||||
|
}
|
||||||
.field input,
|
.field input,
|
||||||
.field select {
|
.field select {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background: #1a1a2e;
|
background: #1a1a2e;
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
|
color-scheme: dark;
|
||||||
border: 1px solid #4a5568;
|
border: 1px solid #4a5568;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
@@ -252,6 +388,79 @@
|
|||||||
color: #666;
|
color: #666;
|
||||||
line-height: 1.4;
|
line-height: 1.4;
|
||||||
}
|
}
|
||||||
|
.info-box {
|
||||||
|
background: rgba(233, 69, 96, 0.05);
|
||||||
|
border: 1px solid #2a3a5e;
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #b0b0b0;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
.info-box p {
|
||||||
|
margin: 0 0 0.5rem;
|
||||||
|
}
|
||||||
|
.info-box p:last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
.info-box .info-title {
|
||||||
|
color: #e0e0e0;
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
.info-box ol {
|
||||||
|
margin: 0.25rem 0 0.5rem;
|
||||||
|
padding-left: 1.25rem;
|
||||||
|
}
|
||||||
|
.info-box li {
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
.info-box strong {
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
|
.ext-link {
|
||||||
|
color: #e94560;
|
||||||
|
cursor: pointer;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
.ext-link:hover {
|
||||||
|
color: #ff6b81;
|
||||||
|
}
|
||||||
|
.info-box ul {
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
padding-left: 1.25rem;
|
||||||
|
}
|
||||||
|
.btn-download {
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
color: #e0e0e0;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
border-radius: 6px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
width: 100%;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
.btn-download:hover:not(:disabled) {
|
||||||
|
background: #1a4a7a;
|
||||||
|
border-color: #e94560;
|
||||||
|
}
|
||||||
|
.btn-download:disabled {
|
||||||
|
opacity: 0.6;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
.status-success {
|
||||||
|
color: #4ecdc4;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
}
|
||||||
|
.status-error {
|
||||||
|
color: #e94560;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
.modal-footer {
|
.modal-footer {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: flex-end;
|
justify-content: flex-end;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { speakers } from '$lib/stores/transcript';
|
import { speakers } from '$lib/stores/transcript';
|
||||||
|
import { settings } from '$lib/stores/settings';
|
||||||
import type { Speaker } from '$lib/types/transcript';
|
import type { Speaker } from '$lib/types/transcript';
|
||||||
|
|
||||||
let editingSpeakerId = $state<string | null>(null);
|
let editingSpeakerId = $state<string | null>(null);
|
||||||
@@ -34,7 +35,14 @@
|
|||||||
<div class="speaker-manager">
|
<div class="speaker-manager">
|
||||||
<h3>Speakers</h3>
|
<h3>Speakers</h3>
|
||||||
{#if $speakers.length === 0}
|
{#if $speakers.length === 0}
|
||||||
<p class="empty-hint">No speakers detected yet</p>
|
<p class="empty-hint">No speakers detected</p>
|
||||||
|
{#if $settings.skip_diarization}
|
||||||
|
<p class="setup-hint">Speaker detection is disabled. Enable it in Settings > Speakers.</p>
|
||||||
|
{:else if !$settings.hf_token}
|
||||||
|
<p class="setup-hint">Speaker detection requires a HuggingFace token. Configure it in Settings > Speakers.</p>
|
||||||
|
{:else}
|
||||||
|
<p class="setup-hint">Speaker detection ran but found no distinct speakers, or the model may need to be downloaded. Check Settings > Speakers.</p>
|
||||||
|
{/if}
|
||||||
{:else}
|
{:else}
|
||||||
<ul class="speaker-list">
|
<ul class="speaker-list">
|
||||||
{#each $speakers as speaker (speaker.id)}
|
{#each $speakers as speaker (speaker.id)}
|
||||||
@@ -78,6 +86,19 @@
|
|||||||
.empty-hint {
|
.empty-hint {
|
||||||
color: #666;
|
color: #666;
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
.setup-hint {
|
||||||
|
color: #555;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
line-height: 1.4;
|
||||||
|
}
|
||||||
|
.setup-hint code {
|
||||||
|
background: rgba(233, 69, 96, 0.15);
|
||||||
|
color: #e94560;
|
||||||
|
padding: 0.1rem 0.3rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-size: 0.7rem;
|
||||||
}
|
}
|
||||||
.speaker-list {
|
.speaker-list {
|
||||||
list-style: none;
|
list-style: none;
|
||||||
|
|||||||
@@ -60,12 +60,14 @@
|
|||||||
function finishEditing(segmentId: string) {
|
function finishEditing(segmentId: string) {
|
||||||
const trimmed = editText.trim();
|
const trimmed = editText.trim();
|
||||||
if (trimmed) {
|
if (trimmed) {
|
||||||
// Update the segment text in the store
|
|
||||||
segments.update(segs => segs.map(s => {
|
segments.update(segs => segs.map(s => {
|
||||||
if (s.id !== segmentId) return s;
|
if (s.id !== segmentId) return s;
|
||||||
|
const newWordTexts = trimmed.split(/\s+/);
|
||||||
|
const newWords = redistributeWords(s, newWordTexts);
|
||||||
return {
|
return {
|
||||||
...s,
|
...s,
|
||||||
text: trimmed,
|
text: trimmed,
|
||||||
|
words: newWords,
|
||||||
original_text: s.original_text ?? s.text,
|
original_text: s.original_text ?? s.text,
|
||||||
is_edited: true,
|
is_edited: true,
|
||||||
edited_at: new Date().toISOString(),
|
edited_at: new Date().toISOString(),
|
||||||
@@ -76,6 +78,106 @@
|
|||||||
editingSegmentId = null;
|
editingSegmentId = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redistribute word timing after an edit.
|
||||||
|
*
|
||||||
|
* Uses a diff-like alignment between old and new word lists:
|
||||||
|
* - Unchanged words keep their original timing
|
||||||
|
* - Spelling fixes (same position, same count) keep timing
|
||||||
|
* - Split words (1 old → N new) divide the original time range proportionally
|
||||||
|
* - Inserted words with no match get interpolated timing
|
||||||
|
*/
|
||||||
|
function redistributeWords(segment: Segment, newWordTexts: string[]): Word[] {
|
||||||
|
const oldWords = segment.words;
|
||||||
|
|
||||||
|
// Same word count — preserve per-word timing (spelling fixes)
|
||||||
|
if (newWordTexts.length === oldWords.length) {
|
||||||
|
return oldWords.map((w, i) => ({ ...w, word: newWordTexts[i] }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Align old words to new words using a simple greedy match.
|
||||||
|
// Build a mapping: for each old word, which new words does it cover?
|
||||||
|
const oldTexts = oldWords.map(w => w.word.toLowerCase());
|
||||||
|
const newTexts = newWordTexts.map(w => w.toLowerCase());
|
||||||
|
|
||||||
|
// Walk both lists, greedily matching old words to new words
|
||||||
|
const result: Word[] = [];
|
||||||
|
let oldIdx = 0;
|
||||||
|
let newIdx = 0;
|
||||||
|
|
||||||
|
while (newIdx < newTexts.length) {
|
||||||
|
if (oldIdx < oldTexts.length && oldTexts[oldIdx] === newTexts[newIdx]) {
|
||||||
|
// Exact match — keep original timing
|
||||||
|
result.push({ ...oldWords[oldIdx], word: newWordTexts[newIdx], word_index: newIdx });
|
||||||
|
oldIdx++;
|
||||||
|
newIdx++;
|
||||||
|
} else if (oldIdx < oldTexts.length) {
|
||||||
|
// Check if old word was split into multiple new words.
|
||||||
|
// E.g., "gonna" → "going to": see if concatenating upcoming new words
|
||||||
|
// matches the old word (or close enough — just check if old word's chars
|
||||||
|
// are consumed by the next few new words).
|
||||||
|
let splitCount = 0;
|
||||||
|
let combined = '';
|
||||||
|
for (let k = newIdx; k < newTexts.length && k - newIdx < 5; k++) {
|
||||||
|
combined += (k > newIdx ? '' : '') + newTexts[k];
|
||||||
|
if (combined.length >= oldTexts[oldIdx].length) {
|
||||||
|
splitCount = k - newIdx + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (splitCount > 1) {
|
||||||
|
// Split: distribute the old word's time range proportionally
|
||||||
|
const ow = oldWords[oldIdx];
|
||||||
|
const totalDuration = ow.end_ms - ow.start_ms;
|
||||||
|
for (let k = 0; k < splitCount; k++) {
|
||||||
|
const fraction = 1 / splitCount;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx + k}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx + k],
|
||||||
|
start_ms: Math.round(ow.start_ms + totalDuration * fraction * k),
|
||||||
|
end_ms: Math.round(ow.start_ms + totalDuration * fraction * (k + 1)),
|
||||||
|
confidence: ow.confidence,
|
||||||
|
word_index: newIdx + k,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
oldIdx++;
|
||||||
|
newIdx += splitCount;
|
||||||
|
} else {
|
||||||
|
// No match found — interpolate timing from neighbors
|
||||||
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
||||||
|
const nextStart = oldIdx < oldWords.length ? oldWords[oldIdx].start_ms : segment.end_ms;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx],
|
||||||
|
start_ms: prevEnd,
|
||||||
|
end_ms: nextStart,
|
||||||
|
confidence: 1.0,
|
||||||
|
word_index: newIdx,
|
||||||
|
});
|
||||||
|
newIdx++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No more old words — use end of segment
|
||||||
|
const prevEnd = result.length > 0 ? result[result.length - 1].end_ms : segment.start_ms;
|
||||||
|
result.push({
|
||||||
|
id: `${segment.id}-word-${newIdx}`,
|
||||||
|
segment_id: segment.id,
|
||||||
|
word: newWordTexts[newIdx],
|
||||||
|
start_ms: prevEnd,
|
||||||
|
end_ms: segment.end_ms,
|
||||||
|
confidence: 1.0,
|
||||||
|
word_index: newIdx,
|
||||||
|
});
|
||||||
|
newIdx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
|
function handleEditKeydown(e: KeyboardEvent, segmentId: string) {
|
||||||
if (e.key === 'Escape') {
|
if (e.key === 'Escape') {
|
||||||
editingSegmentId = null;
|
editingSegmentId = null;
|
||||||
@@ -217,6 +319,8 @@
|
|||||||
.segment-text {
|
.segment-text {
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
padding-left: 0.75rem;
|
padding-left: 0.75rem;
|
||||||
|
word-wrap: break-word;
|
||||||
|
overflow-wrap: break-word;
|
||||||
}
|
}
|
||||||
.word {
|
.word {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
|
|||||||
@@ -12,6 +12,8 @@
|
|||||||
|
|
||||||
let container: HTMLDivElement;
|
let container: HTMLDivElement;
|
||||||
let wavesurfer: WaveSurfer | null = $state(null);
|
let wavesurfer: WaveSurfer | null = $state(null);
|
||||||
|
let isReady = $state(false);
|
||||||
|
let isLoading = $state(false);
|
||||||
let currentTime = $state('0:00');
|
let currentTime = $state('0:00');
|
||||||
let totalTime = $state('0:00');
|
let totalTime = $state('0:00');
|
||||||
|
|
||||||
@@ -31,6 +33,7 @@
|
|||||||
barWidth: 2,
|
barWidth: 2,
|
||||||
barGap: 1,
|
barGap: 1,
|
||||||
barRadius: 2,
|
barRadius: 2,
|
||||||
|
backend: 'WebAudio',
|
||||||
});
|
});
|
||||||
|
|
||||||
wavesurfer.on('timeupdate', (time: number) => {
|
wavesurfer.on('timeupdate', (time: number) => {
|
||||||
@@ -39,6 +42,8 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
wavesurfer.on('ready', () => {
|
wavesurfer.on('ready', () => {
|
||||||
|
isReady = true;
|
||||||
|
isLoading = false;
|
||||||
const dur = wavesurfer!.getDuration();
|
const dur = wavesurfer!.getDuration();
|
||||||
durationMs.set(Math.round(dur * 1000));
|
durationMs.set(Math.round(dur * 1000));
|
||||||
totalTime = formatTime(dur);
|
totalTime = formatTime(dur);
|
||||||
@@ -48,8 +53,12 @@
|
|||||||
wavesurfer.on('pause', () => isPlaying.set(false));
|
wavesurfer.on('pause', () => isPlaying.set(false));
|
||||||
wavesurfer.on('finish', () => isPlaying.set(false));
|
wavesurfer.on('finish', () => isPlaying.set(false));
|
||||||
|
|
||||||
|
wavesurfer.on('loading', () => {
|
||||||
|
isReady = false;
|
||||||
|
});
|
||||||
|
|
||||||
if (audioUrl) {
|
if (audioUrl) {
|
||||||
wavesurfer.load(audioUrl);
|
loadAudio(audioUrl);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -57,20 +66,21 @@
|
|||||||
wavesurfer?.destroy();
|
wavesurfer?.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
/** Toggle play/pause. Exposed for keyboard shortcuts. */
|
/** Toggle play/pause from current position. Exposed for keyboard shortcuts. */
|
||||||
export function togglePlayPause() {
|
export function togglePlayPause() {
|
||||||
wavesurfer?.playPause();
|
if (!wavesurfer || !isReady) return;
|
||||||
|
wavesurfer.playPause();
|
||||||
}
|
}
|
||||||
|
|
||||||
function skipBack() {
|
function skipBack() {
|
||||||
if (wavesurfer) {
|
if (wavesurfer && isReady) {
|
||||||
const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
|
const time = Math.max(0, wavesurfer.getCurrentTime() - 5);
|
||||||
wavesurfer.setTime(time);
|
wavesurfer.setTime(time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function skipForward() {
|
function skipForward() {
|
||||||
if (wavesurfer) {
|
if (wavesurfer && isReady) {
|
||||||
const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
|
const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5);
|
||||||
wavesurfer.setTime(time);
|
wavesurfer.setTime(time);
|
||||||
}
|
}
|
||||||
@@ -78,16 +88,17 @@
|
|||||||
|
|
||||||
/** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
|
/** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
|
||||||
export function seekTo(timeMs: number) {
|
export function seekTo(timeMs: number) {
|
||||||
if (wavesurfer) {
|
if (!wavesurfer || !isReady) {
|
||||||
|
console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
|
||||||
|
return;
|
||||||
|
}
|
||||||
wavesurfer.setTime(timeMs / 1000);
|
wavesurfer.setTime(timeMs / 1000);
|
||||||
if (!wavesurfer.isPlaying()) {
|
|
||||||
wavesurfer.play();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Load a new audio file. */
|
/** Load a new audio file. */
|
||||||
export function loadAudio(url: string) {
|
export function loadAudio(url: string) {
|
||||||
|
isReady = false;
|
||||||
|
isLoading = true;
|
||||||
wavesurfer?.load(url);
|
wavesurfer?.load(url);
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
@@ -95,11 +106,17 @@
|
|||||||
<div class="waveform-player">
|
<div class="waveform-player">
|
||||||
<div class="waveform-container" bind:this={container}></div>
|
<div class="waveform-container" bind:this={container}></div>
|
||||||
<div class="controls">
|
<div class="controls">
|
||||||
<button class="control-btn" onclick={skipBack} title="Back 5s">⏪</button>
|
<button class="control-btn" onclick={skipBack} title="Back 5s" disabled={!isReady}>⏪</button>
|
||||||
<button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause">
|
<button class="control-btn play-btn" onclick={togglePlayPause} title="Play/Pause" disabled={!isReady}>
|
||||||
{#if $isPlaying}⏸{:else}▶{/if}
|
{#if !isReady}
|
||||||
|
⏳
|
||||||
|
{:else if $isPlaying}
|
||||||
|
⏸
|
||||||
|
{:else}
|
||||||
|
▶
|
||||||
|
{/if}
|
||||||
</button>
|
</button>
|
||||||
<button class="control-btn" onclick={skipForward} title="Forward 5s">⏩</button>
|
<button class="control-btn" onclick={skipForward} title="Forward 5s" disabled={!isReady}>⏩</button>
|
||||||
<span class="time">{currentTime} / {totalTime}</span>
|
<span class="time">{currentTime} / {totalTime}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -129,9 +146,13 @@
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 1rem;
|
font-size: 1rem;
|
||||||
}
|
}
|
||||||
.control-btn:hover {
|
.control-btn:hover:not(:disabled) {
|
||||||
background: #1a4a7a;
|
background: #1a4a7a;
|
||||||
}
|
}
|
||||||
|
.control-btn:disabled {
|
||||||
|
opacity: 0.4;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
.play-btn {
|
.play-btn {
|
||||||
padding: 0.4rem 1rem;
|
padding: 0.4rem 1rem;
|
||||||
font-size: 1.2rem;
|
font-size: 1.2rem;
|
||||||
|
|||||||
@@ -8,12 +8,16 @@ export interface AppSettings {
|
|||||||
openai_model: string;
|
openai_model: string;
|
||||||
anthropic_model: string;
|
anthropic_model: string;
|
||||||
litellm_model: string;
|
litellm_model: string;
|
||||||
|
litellm_api_key: string;
|
||||||
|
litellm_api_base: string;
|
||||||
local_model_path: string;
|
local_model_path: string;
|
||||||
local_binary_path: string;
|
local_binary_path: string;
|
||||||
transcription_model: string;
|
transcription_model: string;
|
||||||
transcription_device: string;
|
transcription_device: string;
|
||||||
transcription_language: string;
|
transcription_language: string;
|
||||||
skip_diarization: boolean;
|
skip_diarization: boolean;
|
||||||
|
hf_token: string;
|
||||||
|
num_speakers: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaults: AppSettings = {
|
const defaults: AppSettings = {
|
||||||
@@ -23,12 +27,16 @@ const defaults: AppSettings = {
|
|||||||
openai_model: 'gpt-4o-mini',
|
openai_model: 'gpt-4o-mini',
|
||||||
anthropic_model: 'claude-sonnet-4-6',
|
anthropic_model: 'claude-sonnet-4-6',
|
||||||
litellm_model: 'gpt-4o-mini',
|
litellm_model: 'gpt-4o-mini',
|
||||||
|
litellm_api_key: '',
|
||||||
|
litellm_api_base: '',
|
||||||
local_model_path: '',
|
local_model_path: '',
|
||||||
local_binary_path: 'llama-server',
|
local_binary_path: 'llama-server',
|
||||||
transcription_model: 'base',
|
transcription_model: 'base',
|
||||||
transcription_device: 'cpu',
|
transcription_device: 'cpu',
|
||||||
transcription_language: '',
|
transcription_language: '',
|
||||||
skip_diarization: false,
|
skip_diarization: false,
|
||||||
|
hf_token: '',
|
||||||
|
num_speakers: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const settings = writable<AppSettings>({ ...defaults });
|
export const settings = writable<AppSettings>({ ...defaults });
|
||||||
@@ -45,4 +53,20 @@ export async function loadSettings(): Promise<void> {
|
|||||||
export async function saveSettings(s: AppSettings): Promise<void> {
|
export async function saveSettings(s: AppSettings): Promise<void> {
|
||||||
settings.set(s);
|
settings.set(s);
|
||||||
await invoke('save_settings', { settings: s });
|
await invoke('save_settings', { settings: s });
|
||||||
|
|
||||||
|
// Configure the AI provider in the Python sidecar
|
||||||
|
const configMap: Record<string, Record<string, string>> = {
|
||||||
|
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
||||||
|
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
||||||
|
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
||||||
|
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
|
||||||
|
};
|
||||||
|
const config = configMap[s.ai_provider];
|
||||||
|
if (config) {
|
||||||
|
try {
|
||||||
|
await invoke('ai_configure', { provider: s.ai_provider, config });
|
||||||
|
} catch {
|
||||||
|
// Sidecar may not be running yet — provider will be configured on first use
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,12 +11,18 @@
|
|||||||
import { segments, speakers } from '$lib/stores/transcript';
|
import { segments, speakers } from '$lib/stores/transcript';
|
||||||
import { settings, loadSettings } from '$lib/stores/settings';
|
import { settings, loadSettings } from '$lib/stores/settings';
|
||||||
import type { Segment, Speaker } from '$lib/types/transcript';
|
import type { Segment, Speaker } from '$lib/types/transcript';
|
||||||
import { onMount } from 'svelte';
|
import { onMount, tick } from 'svelte';
|
||||||
|
|
||||||
|
let appReady = $state(false);
|
||||||
let waveformPlayer: WaveformPlayer;
|
let waveformPlayer: WaveformPlayer;
|
||||||
let audioUrl = $state('');
|
let audioUrl = $state('');
|
||||||
let showSettings = $state(false);
|
let showSettings = $state(false);
|
||||||
|
|
||||||
|
// Project management state
|
||||||
|
let currentProjectPath = $state<string | null>(null);
|
||||||
|
let currentProjectName = $state('');
|
||||||
|
let audioFilePath = $state('');
|
||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
loadSettings();
|
loadSettings();
|
||||||
|
|
||||||
@@ -43,8 +49,8 @@
|
|||||||
|
|
||||||
// Close export dropdown on outside click
|
// Close export dropdown on outside click
|
||||||
function handleClickOutside(e: MouseEvent) {
|
function handleClickOutside(e: MouseEvent) {
|
||||||
if (showExportMenu) {
|
|
||||||
const target = e.target as HTMLElement;
|
const target = e.target as HTMLElement;
|
||||||
|
if (showExportMenu) {
|
||||||
if (!target.closest('.export-dropdown')) {
|
if (!target.closest('.export-dropdown')) {
|
||||||
showExportMenu = false;
|
showExportMenu = false;
|
||||||
}
|
}
|
||||||
@@ -54,6 +60,8 @@
|
|||||||
document.addEventListener('keydown', handleKeyDown);
|
document.addEventListener('keydown', handleKeyDown);
|
||||||
document.addEventListener('click', handleClickOutside);
|
document.addEventListener('click', handleClickOutside);
|
||||||
|
|
||||||
|
appReady = true;
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
document.removeEventListener('keydown', handleKeyDown);
|
document.removeEventListener('keydown', handleKeyDown);
|
||||||
document.removeEventListener('click', handleClickOutside);
|
document.removeEventListener('click', handleClickOutside);
|
||||||
@@ -67,10 +75,136 @@
|
|||||||
// Speaker color palette for auto-assignment
|
// Speaker color palette for auto-assignment
|
||||||
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
|
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
|
||||||
|
|
||||||
|
async function saveProject() {
|
||||||
|
const defaultName = currentProjectName || 'Untitled';
|
||||||
|
const outputPath = await save({
|
||||||
|
defaultPath: `${defaultName}.vtn`,
|
||||||
|
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
|
||||||
|
});
|
||||||
|
if (!outputPath) return;
|
||||||
|
|
||||||
|
const projectData = {
|
||||||
|
version: 1,
|
||||||
|
name: outputPath.split(/[\\/]/).pop()?.replace('.vtn', '') || defaultName,
|
||||||
|
audio_file: audioFilePath,
|
||||||
|
created_at: new Date().toISOString(),
|
||||||
|
segments: $segments.map(seg => {
|
||||||
|
const speaker = $speakers.find(s => s.id === seg.speaker_id);
|
||||||
|
return {
|
||||||
|
text: seg.text,
|
||||||
|
start_ms: seg.start_ms,
|
||||||
|
end_ms: seg.end_ms,
|
||||||
|
speaker: speaker?.label ?? null,
|
||||||
|
is_edited: seg.is_edited,
|
||||||
|
words: seg.words.map(w => ({
|
||||||
|
word: w.word,
|
||||||
|
start_ms: w.start_ms,
|
||||||
|
end_ms: w.end_ms,
|
||||||
|
confidence: w.confidence ?? 0,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
speakers: $speakers.map(s => ({
|
||||||
|
label: s.label,
|
||||||
|
display_name: s.display_name,
|
||||||
|
color: s.color || '#e94560',
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
await invoke('save_project_file', { path: outputPath, project: projectData });
|
||||||
|
currentProjectPath = outputPath;
|
||||||
|
currentProjectName = projectData.name;
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to save project:', err);
|
||||||
|
alert(`Failed to save: ${err}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function openProject() {
|
||||||
|
const filePath = await open({
|
||||||
|
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
|
||||||
|
multiple: false,
|
||||||
|
});
|
||||||
|
if (!filePath) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const project = await invoke<{
|
||||||
|
version: number;
|
||||||
|
name: string;
|
||||||
|
audio_file: string;
|
||||||
|
segments: Array<{
|
||||||
|
text: string;
|
||||||
|
start_ms: number;
|
||||||
|
end_ms: number;
|
||||||
|
speaker: string | null;
|
||||||
|
is_edited: boolean;
|
||||||
|
words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
|
||||||
|
}>;
|
||||||
|
speakers: Array<{ label: string; display_name: string | null; color: string }>;
|
||||||
|
}>('load_project_file', { path: filePath });
|
||||||
|
|
||||||
|
// Rebuild speakers
|
||||||
|
const newSpeakers: Speaker[] = project.speakers.map((s, idx) => ({
|
||||||
|
id: `speaker-${idx}`,
|
||||||
|
project_id: '',
|
||||||
|
label: s.label,
|
||||||
|
display_name: s.display_name,
|
||||||
|
color: s.color,
|
||||||
|
}));
|
||||||
|
speakers.set(newSpeakers);
|
||||||
|
|
||||||
|
const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
|
||||||
|
|
||||||
|
// Rebuild segments
|
||||||
|
const newSegments: Segment[] = project.segments.map((seg, idx) => ({
|
||||||
|
id: `seg-${idx}`,
|
||||||
|
project_id: '',
|
||||||
|
media_file_id: '',
|
||||||
|
speaker_id: seg.speaker ? (speakerLookup.get(seg.speaker) ?? null) : null,
|
||||||
|
start_ms: seg.start_ms,
|
||||||
|
end_ms: seg.end_ms,
|
||||||
|
text: seg.text,
|
||||||
|
original_text: null,
|
||||||
|
confidence: null,
|
||||||
|
is_edited: seg.is_edited,
|
||||||
|
edited_at: null,
|
||||||
|
segment_index: idx,
|
||||||
|
words: seg.words.map((w, widx) => ({
|
||||||
|
id: `word-${idx}-${widx}`,
|
||||||
|
segment_id: `seg-${idx}`,
|
||||||
|
word: w.word,
|
||||||
|
start_ms: w.start_ms,
|
||||||
|
end_ms: w.end_ms,
|
||||||
|
confidence: w.confidence,
|
||||||
|
word_index: widx,
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
segments.set(newSegments);
|
||||||
|
|
||||||
|
// Load audio
|
||||||
|
audioFilePath = project.audio_file;
|
||||||
|
audioUrl = convertFileSrc(project.audio_file);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
|
||||||
|
currentProjectPath = filePath as string;
|
||||||
|
currentProjectName = project.name;
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to load project:', err);
|
||||||
|
alert(`Failed to load project: ${err}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function handleWordClick(timeMs: number) {
|
function handleWordClick(timeMs: number) {
|
||||||
|
console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
|
||||||
waveformPlayer?.seekTo(timeMs);
|
waveformPlayer?.seekTo(timeMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function handleTextEdit(segmentId: string, newText: string) {
|
||||||
|
// In-memory store is already updated by TranscriptEditor.
|
||||||
|
// Changes persist when user saves the project file.
|
||||||
|
}
|
||||||
|
|
||||||
async function handleFileImport() {
|
async function handleFileImport() {
|
||||||
const filePath = await open({
|
const filePath = await open({
|
||||||
multiple: false,
|
multiple: false,
|
||||||
@@ -82,28 +216,99 @@
|
|||||||
});
|
});
|
||||||
if (!filePath) return;
|
if (!filePath) return;
|
||||||
|
|
||||||
// Convert file path to asset URL for wavesurfer
|
// Track the original file path and convert to asset URL for wavesurfer
|
||||||
|
audioFilePath = filePath;
|
||||||
audioUrl = convertFileSrc(filePath);
|
audioUrl = convertFileSrc(filePath);
|
||||||
waveformPlayer?.loadAudio(audioUrl);
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
|
||||||
|
// Clear previous results
|
||||||
|
segments.set([]);
|
||||||
|
speakers.set([]);
|
||||||
|
|
||||||
// Start pipeline (transcription + diarization)
|
// Start pipeline (transcription + diarization)
|
||||||
isTranscribing = true;
|
isTranscribing = true;
|
||||||
transcriptionProgress = 0;
|
transcriptionProgress = 0;
|
||||||
transcriptionStage = 'Starting...';
|
transcriptionStage = 'Starting...';
|
||||||
transcriptionMessage = 'Initializing pipeline...';
|
transcriptionMessage = 'Initializing pipeline...';
|
||||||
|
|
||||||
|
// Flush DOM so the progress overlay renders before the blocking invoke
|
||||||
|
await tick();
|
||||||
|
|
||||||
// Listen for progress events from the sidecar
|
// Listen for progress events from the sidecar
|
||||||
const unlisten = await listen<{
|
const unlisten = await listen<{
|
||||||
percent: number;
|
percent: number;
|
||||||
stage: string;
|
stage: string;
|
||||||
message: string;
|
message: string;
|
||||||
}>('pipeline-progress', (event) => {
|
}>('pipeline-progress', (event) => {
|
||||||
|
console.log('[voice-to-notes] Progress event:', event.payload);
|
||||||
const { percent, stage, message } = event.payload;
|
const { percent, stage, message } = event.payload;
|
||||||
if (typeof percent === 'number') transcriptionProgress = percent;
|
if (typeof percent === 'number') transcriptionProgress = percent;
|
||||||
if (typeof stage === 'string') transcriptionStage = stage;
|
if (typeof stage === 'string') transcriptionStage = stage;
|
||||||
if (typeof message === 'string') transcriptionMessage = message;
|
if (typeof message === 'string') transcriptionMessage = message;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const unlistenSegment = await listen<{
|
||||||
|
index: number;
|
||||||
|
text: string;
|
||||||
|
start_ms: number;
|
||||||
|
end_ms: number;
|
||||||
|
words: Array<{ word: string; start_ms: number; end_ms: number; confidence: number }>;
|
||||||
|
}>('pipeline-segment', (event) => {
|
||||||
|
const seg = event.payload;
|
||||||
|
const newSeg: Segment = {
|
||||||
|
id: `seg-${seg.index}`,
|
||||||
|
project_id: '',
|
||||||
|
media_file_id: '',
|
||||||
|
speaker_id: null,
|
||||||
|
start_ms: seg.start_ms,
|
||||||
|
end_ms: seg.end_ms,
|
||||||
|
text: seg.text,
|
||||||
|
original_text: null,
|
||||||
|
confidence: null,
|
||||||
|
is_edited: false,
|
||||||
|
edited_at: null,
|
||||||
|
segment_index: seg.index,
|
||||||
|
words: seg.words.map((w, widx) => ({
|
||||||
|
id: `word-${seg.index}-${widx}`,
|
||||||
|
segment_id: `seg-${seg.index}`,
|
||||||
|
word: w.word,
|
||||||
|
start_ms: w.start_ms,
|
||||||
|
end_ms: w.end_ms,
|
||||||
|
confidence: w.confidence,
|
||||||
|
word_index: widx,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
segments.update(segs => [...segs, newSeg]);
|
||||||
|
});
|
||||||
|
|
||||||
|
const unlistenSpeaker = await listen<{
|
||||||
|
updates: Array<{ index: number; speaker: string }>;
|
||||||
|
}>('pipeline-speaker-update', (event) => {
|
||||||
|
const { updates } = event.payload;
|
||||||
|
// Build speakers from unique labels
|
||||||
|
const uniqueLabels = [...new Set(updates.map(u => u.speaker))].sort();
|
||||||
|
const newSpeakers: Speaker[] = uniqueLabels.map((label, idx) => ({
|
||||||
|
id: `speaker-${idx}`,
|
||||||
|
project_id: '',
|
||||||
|
label,
|
||||||
|
display_name: null,
|
||||||
|
color: speakerColors[idx % speakerColors.length],
|
||||||
|
}));
|
||||||
|
speakers.set(newSpeakers);
|
||||||
|
|
||||||
|
// Update existing segments with speaker assignments
|
||||||
|
const speakerLookup = new Map(newSpeakers.map(s => [s.label, s.id]));
|
||||||
|
segments.update(segs =>
|
||||||
|
segs.map((seg, i) => {
|
||||||
|
const update = updates.find(u => u.index === i);
|
||||||
|
if (update) {
|
||||||
|
return { ...seg, speaker_id: speakerLookup.get(update.speaker) ?? null };
|
||||||
|
}
|
||||||
|
return seg;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await invoke<{
|
const result = await invoke<{
|
||||||
segments: Array<{
|
segments: Array<{
|
||||||
@@ -128,6 +333,8 @@
|
|||||||
device: $settings.transcription_device || undefined,
|
device: $settings.transcription_device || undefined,
|
||||||
language: $settings.transcription_language || undefined,
|
language: $settings.transcription_language || undefined,
|
||||||
skipDiarization: $settings.skip_diarization || undefined,
|
skipDiarization: $settings.skip_diarization || undefined,
|
||||||
|
hfToken: $settings.hf_token || undefined,
|
||||||
|
numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Create speaker entries from pipeline result
|
// Create speaker entries from pipeline result
|
||||||
@@ -169,11 +376,18 @@
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
segments.set(newSegments);
|
segments.set(newSegments);
|
||||||
|
|
||||||
|
// Set project name from audio file name (user can save explicitly)
|
||||||
|
const fileName = filePath.split(/[\\/]/).pop() || 'Untitled';
|
||||||
|
currentProjectName = fileName.replace(/\.[^.]+$/, '');
|
||||||
|
currentProjectPath = null;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Pipeline failed:', err);
|
console.error('Pipeline failed:', err);
|
||||||
alert(`Pipeline failed: ${err}`);
|
alert(`Pipeline failed: ${err}`);
|
||||||
} finally {
|
} finally {
|
||||||
unlisten();
|
unlisten();
|
||||||
|
unlistenSegment();
|
||||||
|
unlistenSpeaker();
|
||||||
isTranscribing = false;
|
isTranscribing = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -229,11 +443,30 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="app-header">
|
{#if !appReady}
|
||||||
<h1>Voice to Notes</h1>
|
<div class="splash-screen">
|
||||||
|
<h1 class="splash-title">Voice to Notes</h1>
|
||||||
|
<p class="splash-subtitle">Loading...</p>
|
||||||
|
<div class="splash-spinner"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="app-shell">
|
||||||
|
<div class="app-header">
|
||||||
<div class="header-actions">
|
<div class="header-actions">
|
||||||
<button class="import-btn" onclick={handleFileImport}>
|
<button class="settings-btn" onclick={openProject} disabled={isTranscribing}>
|
||||||
|
Open Project
|
||||||
|
</button>
|
||||||
|
{#if $segments.length > 0}
|
||||||
|
<button class="settings-btn" onclick={saveProject}>
|
||||||
|
Save Project
|
||||||
|
</button>
|
||||||
|
{/if}
|
||||||
|
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
|
||||||
|
{#if isTranscribing}
|
||||||
|
Processing...
|
||||||
|
{:else}
|
||||||
Import Audio/Video
|
Import Audio/Video
|
||||||
|
{/if}
|
||||||
</button>
|
</button>
|
||||||
<button class="settings-btn" onclick={() => showSettings = true} title="Settings">
|
<button class="settings-btn" onclick={() => showSettings = true} title="Settings">
|
||||||
Settings
|
Settings
|
||||||
@@ -255,30 +488,32 @@
|
|||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="workspace">
|
<div class="workspace">
|
||||||
<div class="main-content">
|
<div class="main-content">
|
||||||
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
|
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
|
||||||
<TranscriptEditor onWordClick={handleWordClick} />
|
<TranscriptEditor onWordClick={handleWordClick} onTextEdit={handleTextEdit} />
|
||||||
</div>
|
</div>
|
||||||
<div class="sidebar-right">
|
<div class="sidebar-right">
|
||||||
<SpeakerManager />
|
<SpeakerManager />
|
||||||
<AIChatPanel />
|
<AIChatPanel />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<ProgressOverlay
|
<ProgressOverlay
|
||||||
visible={isTranscribing}
|
visible={isTranscribing}
|
||||||
percent={transcriptionProgress}
|
percent={transcriptionProgress}
|
||||||
stage={transcriptionStage}
|
stage={transcriptionStage}
|
||||||
message={transcriptionMessage}
|
message={transcriptionMessage}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<SettingsModal
|
<SettingsModal
|
||||||
visible={showSettings}
|
visible={showSettings}
|
||||||
onClose={() => showSettings = false}
|
onClose={() => showSettings = false}
|
||||||
/>
|
/>
|
||||||
|
{/if}
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
.app-header {
|
.app-header {
|
||||||
@@ -289,10 +524,6 @@
|
|||||||
background: #0f3460;
|
background: #0f3460;
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
}
|
}
|
||||||
h1 {
|
|
||||||
font-size: 1.25rem;
|
|
||||||
margin: 0;
|
|
||||||
}
|
|
||||||
.import-btn {
|
.import-btn {
|
||||||
background: #e94560;
|
background: #e94560;
|
||||||
border: none;
|
border: none;
|
||||||
@@ -303,9 +534,18 @@
|
|||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
}
|
}
|
||||||
.import-btn:hover {
|
.import-btn:hover:not(:disabled) {
|
||||||
background: #d63851;
|
background: #d63851;
|
||||||
}
|
}
|
||||||
|
.import-btn:disabled {
|
||||||
|
opacity: 0.7;
|
||||||
|
cursor: not-allowed;
|
||||||
|
animation: pulse 1.5s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
@keyframes pulse {
|
||||||
|
0%, 100% { opacity: 0.7; }
|
||||||
|
50% { opacity: 1; }
|
||||||
|
}
|
||||||
.header-actions {
|
.header-actions {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 0.5rem;
|
gap: 0.5rem;
|
||||||
@@ -320,10 +560,14 @@
|
|||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
}
|
}
|
||||||
.settings-btn:hover {
|
.settings-btn:hover:not(:disabled) {
|
||||||
background: rgba(255,255,255,0.05);
|
background: rgba(255,255,255,0.05);
|
||||||
border-color: #e94560;
|
border-color: #e94560;
|
||||||
}
|
}
|
||||||
|
.settings-btn:disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
.export-dropdown {
|
.export-dropdown {
|
||||||
position: relative;
|
position: relative;
|
||||||
}
|
}
|
||||||
@@ -366,11 +610,19 @@
|
|||||||
.export-option:hover {
|
.export-option:hover {
|
||||||
background: rgba(233, 69, 96, 0.2);
|
background: rgba(233, 69, 96, 0.2);
|
||||||
}
|
}
|
||||||
|
.app-shell {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100vh;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
.workspace {
|
.workspace {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
padding: 1rem;
|
padding: 1rem;
|
||||||
height: calc(100vh - 3.5rem);
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
|
overflow: hidden;
|
||||||
background: #0a0a23;
|
background: #0a0a23;
|
||||||
}
|
}
|
||||||
.main-content {
|
.main-content {
|
||||||
@@ -379,6 +631,8 @@
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
min-width: 0;
|
min-width: 0;
|
||||||
|
min-height: 0;
|
||||||
|
overflow-y: auto;
|
||||||
}
|
}
|
||||||
.sidebar-right {
|
.sidebar-right {
|
||||||
width: 300px;
|
width: 300px;
|
||||||
@@ -386,5 +640,38 @@
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
|
min-height: 0;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
.splash-screen {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 100vh;
|
||||||
|
background: #0a0a23;
|
||||||
|
color: #e0e0e0;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
.splash-title {
|
||||||
|
font-size: 2rem;
|
||||||
|
margin: 0;
|
||||||
|
color: #e94560;
|
||||||
|
}
|
||||||
|
.splash-subtitle {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: #888;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
.splash-spinner {
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
border: 3px solid #2a3a5e;
|
||||||
|
border-top-color: #e94560;
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
|
@keyframes spin {
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
Reference in New Issue
Block a user