Cross-platform distribution, UI improvements, and performance optimizations
- PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
136
.gitea/workflows/build.yml
Normal file
136
.gitea/workflows/build.yml
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
name: Build & Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ["v*"]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-sidecar:
|
||||||
|
name: Build sidecar (${{ matrix.target }})
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- runner: ubuntu-latest
|
||||||
|
target: x86_64-unknown-linux-gnu
|
||||||
|
platform: linux
|
||||||
|
- runner: windows-latest
|
||||||
|
target: x86_64-pc-windows-msvc
|
||||||
|
platform: windows
|
||||||
|
- runner: macos-latest
|
||||||
|
target: aarch64-apple-darwin
|
||||||
|
platform: macos
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Build sidecar
|
||||||
|
working-directory: python
|
||||||
|
run: python build_sidecar.py --cpu-only
|
||||||
|
|
||||||
|
- name: Upload sidecar artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sidecar-${{ matrix.target }}
|
||||||
|
path: python/dist/voice-to-notes-sidecar/
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
build-tauri:
|
||||||
|
name: Build app (${{ matrix.target }})
|
||||||
|
needs: build-sidecar
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- runner: ubuntu-latest
|
||||||
|
target: x86_64-unknown-linux-gnu
|
||||||
|
platform: linux
|
||||||
|
- runner: windows-latest
|
||||||
|
target: x86_64-pc-windows-msvc
|
||||||
|
platform: windows
|
||||||
|
- runner: macos-latest
|
||||||
|
target: aarch64-apple-darwin
|
||||||
|
platform: macos
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
# Note: 'cache: npm' requires the Gitea instance to have
|
||||||
|
# Actions cache configured. Remove this if caching is unavailable.
|
||||||
|
cache: npm
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
|
- name: Install system dependencies (Linux)
|
||||||
|
if: matrix.platform == 'linux'
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
|
||||||
|
|
||||||
|
- name: Download sidecar artifact
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sidecar-${{ matrix.target }}
|
||||||
|
path: src-tauri/binaries/
|
||||||
|
|
||||||
|
- name: Make sidecar executable (Unix)
|
||||||
|
if: matrix.platform != 'windows'
|
||||||
|
run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
env:
|
||||||
|
TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
|
||||||
|
TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
|
||||||
|
|
||||||
|
- name: Upload app artifacts (Linux)
|
||||||
|
if: matrix.platform == 'linux'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/deb/*.deb
|
||||||
|
src-tauri/target/release/bundle/appimage/*.AppImage
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Upload app artifacts (Windows)
|
||||||
|
if: matrix.platform == 'windows'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/msi/*.msi
|
||||||
|
src-tauri/target/release/bundle/nsis/*.exe
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Upload app artifacts (macOS)
|
||||||
|
if: matrix.platform == 'macos'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/dmg/*.dmg
|
||||||
|
src-tauri/target/release/bundle/macos/*.app
|
||||||
|
retention-days: 30
|
||||||
141
.github/workflows/build.yml
vendored
Normal file
141
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
name: Build & Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ["v*"]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-sidecar:
|
||||||
|
name: Build sidecar (${{ matrix.target }})
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- runner: ubuntu-20.04
|
||||||
|
target: x86_64-unknown-linux-gnu
|
||||||
|
platform: linux
|
||||||
|
- runner: windows-latest
|
||||||
|
target: x86_64-pc-windows-msvc
|
||||||
|
platform: windows
|
||||||
|
- runner: macos-13
|
||||||
|
target: x86_64-apple-darwin
|
||||||
|
platform: macos-intel
|
||||||
|
- runner: macos-14
|
||||||
|
target: aarch64-apple-darwin
|
||||||
|
platform: macos-arm
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Build sidecar
|
||||||
|
working-directory: python
|
||||||
|
run: python build_sidecar.py --cpu-only
|
||||||
|
|
||||||
|
- name: Upload sidecar artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sidecar-${{ matrix.target }}
|
||||||
|
path: python/dist/voice-to-notes-sidecar/
|
||||||
|
retention-days: 7
|
||||||
|
|
||||||
|
build-tauri:
|
||||||
|
name: Build app (${{ matrix.target }})
|
||||||
|
needs: build-sidecar
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- runner: ubuntu-20.04
|
||||||
|
target: x86_64-unknown-linux-gnu
|
||||||
|
platform: linux
|
||||||
|
- runner: windows-latest
|
||||||
|
target: x86_64-pc-windows-msvc
|
||||||
|
platform: windows
|
||||||
|
- runner: macos-13
|
||||||
|
target: x86_64-apple-darwin
|
||||||
|
platform: macos-intel
|
||||||
|
- runner: macos-14
|
||||||
|
target: aarch64-apple-darwin
|
||||||
|
platform: macos-arm
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
cache: npm
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
|
- name: Install system dependencies (Linux)
|
||||||
|
if: matrix.platform == 'linux'
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
|
||||||
|
|
||||||
|
- name: Download sidecar artifact
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sidecar-${{ matrix.target }}
|
||||||
|
path: src-tauri/binaries/
|
||||||
|
|
||||||
|
- name: Make sidecar executable (Unix)
|
||||||
|
if: matrix.platform != 'windows'
|
||||||
|
run: chmod +x src-tauri/binaries/voice-to-notes-sidecar-${{ matrix.target }}
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
env:
|
||||||
|
TAURI_SIGNING_PRIVATE_KEY: ${{ secrets.TAURI_SIGNING_PRIVATE_KEY }}
|
||||||
|
TAURI_CONFIG: '{"bundle":{"externalBin":["binaries/voice-to-notes-sidecar"]}}'
|
||||||
|
|
||||||
|
- name: Upload app artifacts (Linux)
|
||||||
|
if: matrix.platform == 'linux'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/deb/*.deb
|
||||||
|
src-tauri/target/release/bundle/appimage/*.AppImage
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Upload app artifacts (Windows)
|
||||||
|
if: matrix.platform == 'windows'
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/msi/*.msi
|
||||||
|
src-tauri/target/release/bundle/nsis/*.exe
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
- name: Upload app artifacts (macOS)
|
||||||
|
if: startsWith(matrix.platform, 'macos')
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: app-${{ matrix.target }}
|
||||||
|
path: |
|
||||||
|
src-tauri/target/release/bundle/dmg/*.dmg
|
||||||
|
src-tauri/target/release/bundle/macos/*.app
|
||||||
|
retention-days: 30
|
||||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -46,3 +46,9 @@ Thumbs.db
|
|||||||
*.ogg
|
*.ogg
|
||||||
*.flac
|
*.flac
|
||||||
!test/fixtures/*
|
!test/fixtures/*
|
||||||
|
|
||||||
|
# Sidecar build artifacts
|
||||||
|
src-tauri/binaries/*
|
||||||
|
!src-tauri/binaries/.gitkeep
|
||||||
|
python/dist/
|
||||||
|
python/build/
|
||||||
|
|||||||
14
CLAUDE.md
14
CLAUDE.md
@@ -8,7 +8,7 @@ Desktop app for transcribing audio/video with speaker identification. Runs local
|
|||||||
- **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
|
- **ML pipeline:** Python sidecar process (faster-whisper, pyannote.audio, wav2vec2)
|
||||||
- **Database:** SQLite (via rusqlite in Rust)
|
- **Database:** SQLite (via rusqlite in Rust)
|
||||||
- **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
|
- **Local AI:** Bundled llama-server (llama.cpp) — default, no install needed
|
||||||
- **Cloud AI providers:** LiteLLM, OpenAI, Anthropic (optional, user-configured)
|
- **Cloud AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (optional, user-configured)
|
||||||
- **Caption export:** pysubs2 (Python)
|
- **Caption export:** pysubs2 (Python)
|
||||||
- **Audio UI:** wavesurfer.js
|
- **Audio UI:** wavesurfer.js
|
||||||
- **Transcript editor:** TipTap (ProseMirror)
|
- **Transcript editor:** TipTap (ProseMirror)
|
||||||
@@ -40,7 +40,13 @@ docs/ # Architecture and design documents
|
|||||||
- Database: UUIDs as primary keys (TEXT type in SQLite)
|
- Database: UUIDs as primary keys (TEXT type in SQLite)
|
||||||
- All timestamps in milliseconds (integer) relative to media file start
|
- All timestamps in milliseconds (integer) relative to media file start
|
||||||
|
|
||||||
|
## Distribution
|
||||||
|
- Python sidecar is frozen via PyInstaller into a standalone binary for distribution
|
||||||
|
- Tauri bundles the sidecar via `externalBin` — no Python required for end users
|
||||||
|
- CI/CD builds on Gitea Actions (Linux, Windows, macOS ARM)
|
||||||
|
- Dev mode uses system Python (`VOICE_TO_NOTES_DEV=1` or debug builds)
|
||||||
|
|
||||||
## Platform Targets
|
## Platform Targets
|
||||||
- Linux (primary development target)
|
- Linux x86_64 (primary development target)
|
||||||
- Windows (must work, tested before release)
|
- Windows x86_64
|
||||||
- macOS (future, not yet targeted)
|
- macOS aarch64 (Apple Silicon)
|
||||||
|
|||||||
94
README.md
94
README.md
@@ -2,28 +2,90 @@
|
|||||||
|
|
||||||
A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
|
A desktop application that transcribes audio/video recordings with speaker identification, producing editable transcriptions with synchronized audio playback.
|
||||||
|
|
||||||
## Goals
|
## Features
|
||||||
|
|
||||||
- **Speech-to-Text Transcription** — Accurately convert spoken audio from recordings into text
|
- **Speech-to-Text Transcription** — Accurate transcription via faster-whisper (Whisper models) with word-level timestamps
|
||||||
- **Speaker Identification (Diarization)** — Detect and distinguish between different speakers in a conversation
|
- **Speaker Identification (Diarization)** — Detect and distinguish between speakers using pyannote.audio
|
||||||
- **Speaker Naming** — Assign and persist speaker names/IDs across the transcription
|
- **Synchronized Playback** — Click any word to seek to that point in the audio (Web Audio API for instant playback)
|
||||||
- **Synchronized Playback** — Click any transcribed text segment to play back the corresponding audio for review and correction
|
- **AI Integration** — Ask questions about your transcript via OpenAI, Anthropic, or any OpenAI-compatible API (LiteLLM proxies, Ollama, vLLM)
|
||||||
- **Export Formats**
|
- **Export Formats** — SRT, WebVTT, ASS captions, plain text, and Markdown with speaker labels
|
||||||
- Closed captioning files (SRT, VTT) for video
|
- **Cross-Platform** — Builds for Linux, Windows, and macOS (Apple Silicon)
|
||||||
- Plain text documents with speaker labels
|
|
||||||
- **AI Integration** — Connect to AI providers to ask questions about the conversation and generate condensed notes/summaries
|
|
||||||
|
|
||||||
## Platform Support
|
## Platform Support
|
||||||
|
|
||||||
| Platform | Status |
|
| Platform | Architecture | Status |
|
||||||
|----------|--------|
|
|----------|-------------|--------|
|
||||||
| Linux | Planned (initial target) |
|
| Linux | x86_64 | Supported |
|
||||||
| Windows | Planned (initial target) |
|
| Windows | x86_64 | Supported |
|
||||||
| macOS | Future (pending hardware) |
|
| macOS | ARM (Apple Silicon) | Supported |
|
||||||
|
|
||||||
## Project Status
|
## Tech Stack
|
||||||
|
|
||||||
**Early planning phase** — Architecture and technology decisions in progress.
|
- **Desktop shell:** Tauri v2 (Rust backend + Svelte 5 / TypeScript frontend)
|
||||||
|
- **ML pipeline:** Python sidecar (faster-whisper, pyannote.audio) — frozen via PyInstaller for distribution
|
||||||
|
- **Audio playback:** wavesurfer.js with Web Audio API backend
|
||||||
|
- **AI providers:** OpenAI, Anthropic, OpenAI-compatible endpoints (local or remote)
|
||||||
|
- **Local AI:** Bundled llama-server (llama.cpp)
|
||||||
|
- **Caption export:** pysubs2
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Node.js 20+
|
||||||
|
- Rust (stable)
|
||||||
|
- Python 3.11+ with ML dependencies
|
||||||
|
- System: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev` (Linux)
|
||||||
|
|
||||||
|
### Getting Started
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install frontend dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Install Python sidecar dependencies
|
||||||
|
cd python && pip install -e . && cd ..
|
||||||
|
|
||||||
|
# Run in dev mode (uses system Python for the sidecar)
|
||||||
|
npm run tauri:dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building for Distribution
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build the frozen Python sidecar
|
||||||
|
npm run sidecar:build
|
||||||
|
|
||||||
|
# Build the Tauri app (requires sidecar in src-tauri/binaries/)
|
||||||
|
npm run tauri build
|
||||||
|
```
|
||||||
|
|
||||||
|
### CI/CD
|
||||||
|
|
||||||
|
Gitea Actions workflows are in `.gitea/workflows/`. The build pipeline:
|
||||||
|
|
||||||
|
1. **Build sidecar** — PyInstaller-frozen Python binary per platform (CPU-only PyTorch)
|
||||||
|
2. **Build Tauri app** — Bundles the sidecar via `externalBin`, produces .deb/.AppImage (Linux), .msi (Windows), .dmg (macOS)
|
||||||
|
|
||||||
|
#### Required Secrets
|
||||||
|
|
||||||
|
| Secret | Purpose | Required? |
|
||||||
|
|--------|---------|-----------|
|
||||||
|
| `TAURI_SIGNING_PRIVATE_KEY` | Signs Tauri update bundles | Optional (for auto-updates) |
|
||||||
|
|
||||||
|
No other secrets are needed for building. AI provider API keys and HuggingFace tokens are configured by end users in the app's Settings.
|
||||||
|
|
||||||
|
### Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
src/ # Svelte 5 frontend
|
||||||
|
src-tauri/ # Rust backend (Tauri commands, sidecar manager, SQLite)
|
||||||
|
python/ # Python sidecar (transcription, diarization, AI)
|
||||||
|
voice_to_notes/ # Python package
|
||||||
|
build_sidecar.py # PyInstaller build script
|
||||||
|
voice_to_notes.spec # PyInstaller spec
|
||||||
|
.gitea/workflows/ # Gitea Actions CI/CD
|
||||||
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,9 @@
|
|||||||
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
"check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
|
||||||
"lint": "eslint .",
|
"lint": "eslint .",
|
||||||
"test": "vitest",
|
"test": "vitest",
|
||||||
"tauri": "tauri"
|
"tauri": "tauri",
|
||||||
|
"tauri:dev": "VOICE_TO_NOTES_DEV=1 tauri dev",
|
||||||
|
"sidecar:build": "cd python && python3 build_sidecar.py"
|
||||||
},
|
},
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
215
python/build_sidecar.py
Normal file
215
python/build_sidecar.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Build the Voice to Notes sidecar as a standalone binary using PyInstaller.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python build_sidecar.py [--cpu-only]
|
||||||
|
|
||||||
|
Produces a directory `dist/voice-to-notes-sidecar/` containing the frozen
|
||||||
|
sidecar binary and all dependencies. The main binary is renamed to include
|
||||||
|
the Tauri target triple for externalBin resolution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import shutil
|
||||||
|
import stat
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||||
|
DIST_DIR = SCRIPT_DIR / "dist"
|
||||||
|
BUILD_DIR = SCRIPT_DIR / "build"
|
||||||
|
SPEC_FILE = SCRIPT_DIR / "voice_to_notes.spec"
|
||||||
|
|
||||||
|
# Static ffmpeg download URLs (GPL-licensed builds)
|
||||||
|
FFMPEG_URLS: dict[str, str] = {
|
||||||
|
"linux-x86_64": "https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz",
|
||||||
|
"darwin-x86_64": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||||
|
"darwin-arm64": "https://evermeet.cx/ffmpeg/getrelease/zip",
|
||||||
|
"win32-x86_64": "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_target_triple() -> str:
|
||||||
|
"""Determine the Tauri-compatible target triple for the current platform."""
|
||||||
|
machine = platform.machine().lower()
|
||||||
|
system = platform.system().lower()
|
||||||
|
|
||||||
|
arch_map = {
|
||||||
|
"x86_64": "x86_64",
|
||||||
|
"amd64": "x86_64",
|
||||||
|
"aarch64": "aarch64",
|
||||||
|
"arm64": "aarch64",
|
||||||
|
}
|
||||||
|
arch = arch_map.get(machine, machine)
|
||||||
|
|
||||||
|
if system == "linux":
|
||||||
|
return f"{arch}-unknown-linux-gnu"
|
||||||
|
elif system == "darwin":
|
||||||
|
return f"{arch}-apple-darwin"
|
||||||
|
elif system == "windows":
|
||||||
|
return f"{arch}-pc-windows-msvc"
|
||||||
|
else:
|
||||||
|
return f"{arch}-unknown-{system}"
|
||||||
|
|
||||||
|
|
||||||
|
def create_venv_and_install(cpu_only: bool) -> Path:
|
||||||
|
"""Create a fresh venv and install dependencies."""
|
||||||
|
venv_dir = BUILD_DIR / "sidecar-venv"
|
||||||
|
if venv_dir.exists():
|
||||||
|
shutil.rmtree(venv_dir)
|
||||||
|
|
||||||
|
print(f"[build] Creating venv at {venv_dir}")
|
||||||
|
subprocess.run([sys.executable, "-m", "venv", str(venv_dir)], check=True)
|
||||||
|
|
||||||
|
# Determine pip and python paths inside venv
|
||||||
|
if sys.platform == "win32":
|
||||||
|
pip = str(venv_dir / "Scripts" / "pip")
|
||||||
|
python = str(venv_dir / "Scripts" / "python")
|
||||||
|
else:
|
||||||
|
pip = str(venv_dir / "bin" / "pip")
|
||||||
|
python = str(venv_dir / "bin" / "python")
|
||||||
|
|
||||||
|
# Upgrade pip
|
||||||
|
subprocess.run([pip, "install", "--upgrade", "pip"], check=True)
|
||||||
|
|
||||||
|
# Install torch (CPU-only to avoid bundling ~2GB of CUDA libs)
|
||||||
|
if cpu_only:
|
||||||
|
print("[build] Installing PyTorch (CPU-only)")
|
||||||
|
subprocess.run(
|
||||||
|
[pip, "install", "torch", "torchaudio",
|
||||||
|
"--index-url", "https://download.pytorch.org/whl/cpu"],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("[build] Installing PyTorch (default, may include CUDA)")
|
||||||
|
subprocess.run([pip, "install", "torch", "torchaudio"], check=True)
|
||||||
|
|
||||||
|
# Install project and dev deps (includes pyinstaller)
|
||||||
|
print("[build] Installing project dependencies")
|
||||||
|
subprocess.run([pip, "install", "-e", f"{SCRIPT_DIR}[dev]"], check=True)
|
||||||
|
|
||||||
|
return Path(python)
|
||||||
|
|
||||||
|
|
||||||
|
def run_pyinstaller(python: Path) -> Path:
|
||||||
|
"""Run PyInstaller using the spec file."""
|
||||||
|
print("[build] Running PyInstaller")
|
||||||
|
subprocess.run(
|
||||||
|
[str(python), "-m", "PyInstaller", "--clean", "--noconfirm", str(SPEC_FILE)],
|
||||||
|
cwd=str(SCRIPT_DIR),
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
output_dir = DIST_DIR / "voice-to-notes-sidecar"
|
||||||
|
if not output_dir.exists():
|
||||||
|
raise RuntimeError(f"PyInstaller output not found at {output_dir}")
|
||||||
|
return output_dir
|
||||||
|
|
||||||
|
|
||||||
|
def download_ffmpeg(output_dir: Path) -> None:
|
||||||
|
"""Download a static ffmpeg/ffprobe binary for the current platform."""
|
||||||
|
system = sys.platform
|
||||||
|
machine = platform.machine().lower()
|
||||||
|
if machine in ("amd64", "x86_64"):
|
||||||
|
machine = "x86_64"
|
||||||
|
elif machine in ("aarch64", "arm64"):
|
||||||
|
machine = "arm64"
|
||||||
|
|
||||||
|
key = f"{system}-{machine}"
|
||||||
|
if system == "win32":
|
||||||
|
key = f"win32-{machine}"
|
||||||
|
elif system == "linux":
|
||||||
|
key = f"linux-{machine}"
|
||||||
|
|
||||||
|
url = FFMPEG_URLS.get(key)
|
||||||
|
if not url:
|
||||||
|
print(f"[build] Warning: No ffmpeg download URL for platform {key}, skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"[build] Downloading ffmpeg for {key}")
|
||||||
|
tmp_path = output_dir / "ffmpeg_download"
|
||||||
|
try:
|
||||||
|
urllib.request.urlretrieve(url, str(tmp_path))
|
||||||
|
|
||||||
|
if url.endswith(".tar.xz"):
|
||||||
|
# Linux static build
|
||||||
|
import tarfile
|
||||||
|
with tarfile.open(str(tmp_path), "r:xz") as tar:
|
||||||
|
for member in tar.getmembers():
|
||||||
|
basename = os.path.basename(member.name)
|
||||||
|
if basename in ("ffmpeg", "ffprobe"):
|
||||||
|
member.name = basename
|
||||||
|
tar.extract(member, path=str(output_dir))
|
||||||
|
dest = output_dir / basename
|
||||||
|
dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
|
||||||
|
elif url.endswith(".zip"):
|
||||||
|
with zipfile.ZipFile(str(tmp_path), "r") as zf:
|
||||||
|
for name in zf.namelist():
|
||||||
|
basename = os.path.basename(name)
|
||||||
|
if basename in ("ffmpeg", "ffprobe", "ffmpeg.exe", "ffprobe.exe"):
|
||||||
|
data = zf.read(name)
|
||||||
|
dest = output_dir / basename
|
||||||
|
dest.write_bytes(data)
|
||||||
|
if sys.platform != "win32":
|
||||||
|
dest.chmod(dest.stat().st_mode | stat.S_IEXEC)
|
||||||
|
print("[build] ffmpeg downloaded successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[build] Warning: Failed to download ffmpeg: {e}")
|
||||||
|
finally:
|
||||||
|
if tmp_path.exists():
|
||||||
|
tmp_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
def rename_binary(output_dir: Path, target_triple: str) -> None:
|
||||||
|
"""Rename the main binary to include the target triple for Tauri."""
|
||||||
|
if sys.platform == "win32":
|
||||||
|
src = output_dir / "voice-to-notes-sidecar.exe"
|
||||||
|
dst = output_dir / f"voice-to-notes-sidecar-{target_triple}.exe"
|
||||||
|
else:
|
||||||
|
src = output_dir / "voice-to-notes-sidecar"
|
||||||
|
dst = output_dir / f"voice-to-notes-sidecar-{target_triple}"
|
||||||
|
|
||||||
|
if src.exists():
|
||||||
|
print(f"[build] Renaming {src.name} -> {dst.name}")
|
||||||
|
src.rename(dst)
|
||||||
|
else:
|
||||||
|
print(f"[build] Warning: Expected binary not found at {src}")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Build the Voice to Notes sidecar binary")
|
||||||
|
parser.add_argument(
|
||||||
|
"--cpu-only",
|
||||||
|
action="store_true",
|
||||||
|
default=True,
|
||||||
|
help="Install CPU-only PyTorch (default: True, avoids bundling CUDA)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--with-cuda",
|
||||||
|
action="store_true",
|
||||||
|
help="Install PyTorch with CUDA support",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
cpu_only = not args.with_cuda
|
||||||
|
|
||||||
|
target_triple = get_target_triple()
|
||||||
|
print(f"[build] Target triple: {target_triple}")
|
||||||
|
print(f"[build] CPU-only: {cpu_only}")
|
||||||
|
|
||||||
|
python = create_venv_and_install(cpu_only)
|
||||||
|
output_dir = run_pyinstaller(python)
|
||||||
|
download_ffmpeg(output_dir)
|
||||||
|
rename_binary(output_dir, target_triple)
|
||||||
|
|
||||||
|
print(f"\n[build] Done! Sidecar built at: {output_dir}")
|
||||||
|
print(f"[build] Copy contents to src-tauri/binaries/ for Tauri bundling")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -13,6 +13,8 @@ dependencies = [
|
|||||||
"faster-whisper>=1.1.0",
|
"faster-whisper>=1.1.0",
|
||||||
"pyannote.audio>=3.1.0",
|
"pyannote.audio>=3.1.0",
|
||||||
"pysubs2>=1.7.0",
|
"pysubs2>=1.7.0",
|
||||||
|
"openai>=1.0.0",
|
||||||
|
"anthropic>=0.20.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -20,6 +22,7 @@ dev = [
|
|||||||
"ruff>=0.8.0",
|
"ruff>=0.8.0",
|
||||||
"pytest>=8.0.0",
|
"pytest>=8.0.0",
|
||||||
"pytest-asyncio>=0.24.0",
|
"pytest-asyncio>=0.24.0",
|
||||||
|
"pyinstaller>=6.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
|||||||
67
python/voice_to_notes.spec
Normal file
67
python/voice_to_notes.spec
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
"""PyInstaller spec for the Voice to Notes sidecar binary."""
|
||||||
|
|
||||||
|
from PyInstaller.utils.hooks import collect_all
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
|
||||||
|
# Collect all files for packages that have shared libraries / data files
|
||||||
|
# PyInstaller often misses these for ML packages
|
||||||
|
ctranslate2_datas, ctranslate2_binaries, ctranslate2_hiddenimports = collect_all("ctranslate2")
|
||||||
|
faster_whisper_datas, faster_whisper_binaries, faster_whisper_hiddenimports = collect_all(
|
||||||
|
"faster_whisper"
|
||||||
|
)
|
||||||
|
pyannote_datas, pyannote_binaries, pyannote_hiddenimports = collect_all("pyannote")
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
["voice_to_notes/main.py"],
|
||||||
|
pathex=[],
|
||||||
|
binaries=ctranslate2_binaries + faster_whisper_binaries + pyannote_binaries,
|
||||||
|
datas=ctranslate2_datas + faster_whisper_datas + pyannote_datas,
|
||||||
|
hiddenimports=[
|
||||||
|
"torch",
|
||||||
|
"torchaudio",
|
||||||
|
"huggingface_hub",
|
||||||
|
"pysubs2",
|
||||||
|
"openai",
|
||||||
|
"anthropic",
|
||||||
|
"litellm",
|
||||||
|
]
|
||||||
|
+ ctranslate2_hiddenimports
|
||||||
|
+ faster_whisper_hiddenimports
|
||||||
|
+ pyannote_hiddenimports,
|
||||||
|
hookspath=[],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=["tkinter", "test", "unittest", "pip", "setuptools"],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
[],
|
||||||
|
exclude_binaries=True,
|
||||||
|
name="voice-to-notes-sidecar",
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
console=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
coll = COLLECT(
|
||||||
|
exe,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
upx_exclude=[],
|
||||||
|
name="voice-to-notes-sidecar",
|
||||||
|
)
|
||||||
@@ -2,7 +2,10 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ctypes
|
||||||
import os
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@@ -21,6 +24,77 @@ class HardwareInfo:
|
|||||||
recommended_compute_type: str = "int8"
|
recommended_compute_type: str = "int8"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_ram_mb() -> int:
|
||||||
|
"""Detect total system RAM in MB (cross-platform).
|
||||||
|
|
||||||
|
Tries platform-specific methods in order:
|
||||||
|
1. Linux: read /proc/meminfo
|
||||||
|
2. macOS: sysctl hw.memsize
|
||||||
|
3. Windows: GlobalMemoryStatusEx via ctypes
|
||||||
|
4. Fallback: os.sysconf (most Unix systems)
|
||||||
|
|
||||||
|
Returns 0 if all methods fail.
|
||||||
|
"""
|
||||||
|
# Linux: read /proc/meminfo
|
||||||
|
if sys.platform == "linux":
|
||||||
|
try:
|
||||||
|
with open("/proc/meminfo") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith("MemTotal:"):
|
||||||
|
# Value is in kB
|
||||||
|
return int(line.split()[1]) // 1024
|
||||||
|
except (FileNotFoundError, ValueError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# macOS: sysctl hw.memsize (returns bytes)
|
||||||
|
if sys.platform == "darwin":
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["sysctl", "-n", "hw.memsize"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
return int(result.stdout.strip()) // (1024 * 1024)
|
||||||
|
except (subprocess.SubprocessError, ValueError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Windows: GlobalMemoryStatusEx via ctypes
|
||||||
|
if sys.platform == "win32":
|
||||||
|
try:
|
||||||
|
|
||||||
|
class MEMORYSTATUSEX(ctypes.Structure):
|
||||||
|
_fields_ = [
|
||||||
|
("dwLength", ctypes.c_ulong),
|
||||||
|
("dwMemoryLoad", ctypes.c_ulong),
|
||||||
|
("ullTotalPhys", ctypes.c_ulonglong),
|
||||||
|
("ullAvailPhys", ctypes.c_ulonglong),
|
||||||
|
("ullTotalPageFile", ctypes.c_ulonglong),
|
||||||
|
("ullAvailPageFile", ctypes.c_ulonglong),
|
||||||
|
("ullTotalVirtual", ctypes.c_ulonglong),
|
||||||
|
("ullAvailVirtual", ctypes.c_ulonglong),
|
||||||
|
("ullAvailExtendedVirtual", ctypes.c_ulonglong),
|
||||||
|
]
|
||||||
|
|
||||||
|
mem_status = MEMORYSTATUSEX()
|
||||||
|
mem_status.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
|
||||||
|
if ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem_status)):
|
||||||
|
return int(mem_status.ullTotalPhys) // (1024 * 1024)
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback: os.sysconf (works on most Unix systems)
|
||||||
|
try:
|
||||||
|
page_size = os.sysconf("SC_PAGE_SIZE")
|
||||||
|
phys_pages = os.sysconf("SC_PHYS_PAGES")
|
||||||
|
if page_size > 0 and phys_pages > 0:
|
||||||
|
return (page_size * phys_pages) // (1024 * 1024)
|
||||||
|
except (ValueError, OSError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def detect_hardware() -> HardwareInfo:
|
def detect_hardware() -> HardwareInfo:
|
||||||
"""Detect available hardware and recommend model configuration."""
|
"""Detect available hardware and recommend model configuration."""
|
||||||
info = HardwareInfo()
|
info = HardwareInfo()
|
||||||
@@ -28,16 +102,8 @@ def detect_hardware() -> HardwareInfo:
|
|||||||
# CPU info
|
# CPU info
|
||||||
info.cpu_cores = os.cpu_count() or 1
|
info.cpu_cores = os.cpu_count() or 1
|
||||||
|
|
||||||
# RAM info
|
# RAM info (cross-platform)
|
||||||
try:
|
info.ram_mb = _detect_ram_mb()
|
||||||
with open("/proc/meminfo") as f:
|
|
||||||
for line in f:
|
|
||||||
if line.startswith("MemTotal:"):
|
|
||||||
# Value is in kB
|
|
||||||
info.ram_mb = int(line.split()[1]) // 1024
|
|
||||||
break
|
|
||||||
except (FileNotFoundError, ValueError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# CUDA detection
|
# CUDA detection
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -260,10 +260,12 @@ def make_ai_chat_handler() -> HandlerFunc:
|
|||||||
model=config.get("model", "claude-sonnet-4-6"),
|
model=config.get("model", "claude-sonnet-4-6"),
|
||||||
))
|
))
|
||||||
elif provider_name == "litellm":
|
elif provider_name == "litellm":
|
||||||
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
|
from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
|
||||||
|
|
||||||
service.register_provider("litellm", LiteLLMProvider(
|
service.register_provider("litellm", OpenAICompatibleProvider(
|
||||||
model=config.get("model", "gpt-4o-mini"),
|
model=config.get("model", "gpt-4o-mini"),
|
||||||
|
api_key=config.get("api_key"),
|
||||||
|
api_base=config.get("api_base"),
|
||||||
))
|
))
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
id=msg.id,
|
id=msg.id,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""LiteLLM provider — multi-provider gateway."""
|
"""OpenAI-compatible provider — works with any OpenAI-compatible API endpoint."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -7,36 +7,44 @@ from typing import Any
|
|||||||
from voice_to_notes.providers.base import AIProvider
|
from voice_to_notes.providers.base import AIProvider
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMProvider(AIProvider):
|
class OpenAICompatibleProvider(AIProvider):
|
||||||
"""Routes through LiteLLM for access to 100+ LLM providers."""
|
"""Connects to any OpenAI-compatible API (LiteLLM proxy, Ollama, vLLM, etc.)."""
|
||||||
|
|
||||||
def __init__(self, model: str = "gpt-4o-mini", **kwargs: Any) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_key: str | None = None,
|
||||||
|
api_base: str | None = None,
|
||||||
|
model: str = "gpt-4o-mini",
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._api_key = api_key or "sk-no-key"
|
||||||
|
self._api_base = api_base
|
||||||
self._model = model
|
self._model = model
|
||||||
self._extra_kwargs = kwargs
|
self._extra_kwargs = kwargs
|
||||||
|
|
||||||
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
|
def chat(self, messages: list[dict[str, str]], **kwargs: Any) -> str:
|
||||||
try:
|
from openai import OpenAI
|
||||||
import litellm
|
|
||||||
except ImportError:
|
|
||||||
raise RuntimeError("litellm package is required. Install with: pip install litellm")
|
|
||||||
|
|
||||||
merged_kwargs = {**self._extra_kwargs, **kwargs}
|
client_kwargs: dict[str, Any] = {"api_key": self._api_key}
|
||||||
response = litellm.completion(
|
if self._api_base:
|
||||||
model=merged_kwargs.get("model", self._model),
|
client_kwargs["base_url"] = self._api_base
|
||||||
|
|
||||||
|
client = OpenAI(**client_kwargs)
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=kwargs.get("model", self._model),
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=merged_kwargs.get("temperature", 0.7),
|
temperature=kwargs.get("temperature", 0.7),
|
||||||
max_tokens=merged_kwargs.get("max_tokens", 2048),
|
max_tokens=kwargs.get("max_tokens", 2048),
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content or ""
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
def is_available(self) -> bool:
|
def is_available(self) -> bool:
|
||||||
try:
|
try:
|
||||||
import litellm # noqa: F401
|
import openai # noqa: F401
|
||||||
|
return bool(self._api_key and self._api_base)
|
||||||
return True
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
return "LiteLLM"
|
return "OpenAI Compatible"
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ class AIProviderService:
|
|||||||
def create_default_service() -> AIProviderService:
|
def create_default_service() -> AIProviderService:
|
||||||
"""Create an AIProviderService with all supported providers registered."""
|
"""Create an AIProviderService with all supported providers registered."""
|
||||||
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
|
from voice_to_notes.providers.anthropic_provider import AnthropicProvider
|
||||||
from voice_to_notes.providers.litellm_provider import LiteLLMProvider
|
from voice_to_notes.providers.litellm_provider import OpenAICompatibleProvider
|
||||||
from voice_to_notes.providers.local_provider import LocalProvider
|
from voice_to_notes.providers.local_provider import LocalProvider
|
||||||
from voice_to_notes.providers.openai_provider import OpenAIProvider
|
from voice_to_notes.providers.openai_provider import OpenAIProvider
|
||||||
|
|
||||||
@@ -100,5 +100,5 @@ def create_default_service() -> AIProviderService:
|
|||||||
service.register_provider("local", LocalProvider())
|
service.register_provider("local", LocalProvider())
|
||||||
service.register_provider("openai", OpenAIProvider())
|
service.register_provider("openai", OpenAIProvider())
|
||||||
service.register_provider("anthropic", AnthropicProvider())
|
service.register_provider("anthropic", AnthropicProvider())
|
||||||
service.register_provider("litellm", LiteLLMProvider())
|
service.register_provider("litellm", OpenAICompatibleProvider())
|
||||||
return service
|
return service
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from typing import Any
|
|||||||
# np.isfinite(None) crashes when max_speakers is not set.
|
# np.isfinite(None) crashes when max_speakers is not set.
|
||||||
os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
|
os.environ.setdefault("PYANNOTE_METRICS_ENABLED", "false")
|
||||||
|
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffmpeg_path
|
||||||
from voice_to_notes.ipc.messages import progress_message
|
from voice_to_notes.ipc.messages import progress_message
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
|
||||||
@@ -40,7 +41,7 @@ def _ensure_wav(file_path: str) -> tuple[str, str | None]:
|
|||||||
try:
|
try:
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[
|
[
|
||||||
"ffmpeg", "-y", "-i", file_path,
|
get_ffmpeg_path(), "-y", "-i", file_path,
|
||||||
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
||||||
tmp.name,
|
tmp.name,
|
||||||
],
|
],
|
||||||
@@ -118,6 +119,14 @@ class DiarizeService:
|
|||||||
|
|
||||||
self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
|
self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
|
||||||
print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
|
print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
|
||||||
|
# Move pipeline to GPU if available
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
self._pipeline = self._pipeline.to(torch.device("cuda"))
|
||||||
|
print(f"[sidecar] Diarization pipeline moved to GPU", file=sys.stderr, flush=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[sidecar] GPU not available for diarization: {e}", file=sys.stderr, flush=True)
|
||||||
return self._pipeline
|
return self._pipeline
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_error = e
|
last_error = e
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
@@ -13,6 +14,7 @@ from voice_to_notes.ipc.messages import (
|
|||||||
speaker_update_message,
|
speaker_update_message,
|
||||||
)
|
)
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffprobe_path
|
||||||
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
|
from voice_to_notes.services.diarize import DiarizeService, SpeakerSegment
|
||||||
from voice_to_notes.services.transcribe import (
|
from voice_to_notes.services.transcribe import (
|
||||||
SegmentResult,
|
SegmentResult,
|
||||||
@@ -82,7 +84,7 @@ class PipelineService:
|
|||||||
"""
|
"""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Step 1: Transcribe
|
# Step 0: Probe audio duration for conditional chunked transcription
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
|
progress_message(request_id, 0, "pipeline", "Starting transcription pipeline...")
|
||||||
)
|
)
|
||||||
@@ -96,12 +98,11 @@ class PipelineService:
|
|||||||
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
|
"words": [{"word": w.word, "start_ms": w.start_ms, "end_ms": w.end_ms, "confidence": w.confidence} for w in seg.words],
|
||||||
}))
|
}))
|
||||||
|
|
||||||
# Probe audio duration for conditional chunked transcription
|
|
||||||
audio_duration_sec = None
|
audio_duration_sec = None
|
||||||
try:
|
try:
|
||||||
import subprocess
|
import subprocess
|
||||||
probe_result = subprocess.run(
|
probe_result = subprocess.run(
|
||||||
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
|
[get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
|
||||||
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
||||||
capture_output=True, text=True, check=True,
|
capture_output=True, text=True, check=True,
|
||||||
)
|
)
|
||||||
@@ -109,9 +110,11 @@ class PipelineService:
|
|||||||
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
except (subprocess.CalledProcessError, FileNotFoundError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _run_transcription() -> TranscriptionResult:
|
||||||
|
"""Run transcription (chunked or standard based on duration)."""
|
||||||
from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
|
from voice_to_notes.services.transcribe import LARGE_FILE_THRESHOLD_SEC
|
||||||
if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
|
if audio_duration_sec and audio_duration_sec > LARGE_FILE_THRESHOLD_SEC:
|
||||||
transcription = self._transcribe_service.transcribe_chunked(
|
return self._transcribe_service.transcribe_chunked(
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
@@ -121,7 +124,7 @@ class PipelineService:
|
|||||||
on_segment=_emit_segment,
|
on_segment=_emit_segment,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
transcription = self._transcribe_service.transcribe(
|
return self._transcribe_service.transcribe(
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
@@ -132,7 +135,8 @@ class PipelineService:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if skip_diarization:
|
if skip_diarization:
|
||||||
# Convert transcription directly without speaker labels
|
# Sequential: transcribe only, no diarization needed
|
||||||
|
transcription = _run_transcription()
|
||||||
result = PipelineResult(
|
result = PipelineResult(
|
||||||
language=transcription.language,
|
language=transcription.language,
|
||||||
language_probability=transcription.language_probability,
|
language_probability=transcription.language_probability,
|
||||||
@@ -150,24 +154,46 @@ class PipelineService:
|
|||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Step 2: Diarize (with graceful fallback)
|
# Parallel execution: run transcription (0-45%) and diarization (45-90%)
|
||||||
|
# concurrently, then merge (90-100%).
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 50, "pipeline", "Starting speaker diarization...")
|
progress_message(
|
||||||
|
request_id, 0, "pipeline",
|
||||||
|
"Starting transcription and diarization in parallel..."
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
diarization = None
|
diarization = None
|
||||||
try:
|
diarization_error = None
|
||||||
diarization = self._diarize_service.diarize(
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
transcription_future = executor.submit(_run_transcription)
|
||||||
|
|
||||||
|
# Use probed audio_duration_sec for diarization progress estimation
|
||||||
|
# (transcription hasn't finished yet, so we can't use transcription.duration_ms)
|
||||||
|
diarization_future = executor.submit(
|
||||||
|
self._diarize_service.diarize,
|
||||||
request_id=request_id,
|
request_id=request_id,
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
num_speakers=num_speakers,
|
num_speakers=num_speakers,
|
||||||
min_speakers=min_speakers,
|
min_speakers=min_speakers,
|
||||||
max_speakers=max_speakers,
|
max_speakers=max_speakers,
|
||||||
hf_token=hf_token,
|
hf_token=hf_token,
|
||||||
audio_duration_sec=transcription.duration_ms / 1000.0,
|
audio_duration_sec=audio_duration_sec,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Wait for both futures. We need the transcription result regardless,
|
||||||
|
# but diarization may fail gracefully.
|
||||||
|
transcription = transcription_future.result()
|
||||||
|
write_message(
|
||||||
|
progress_message(request_id, 45, "pipeline", "Transcription complete")
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
diarization = diarization_future.result()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
|
diarization_error = e
|
||||||
print(
|
print(
|
||||||
f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
|
f"[sidecar] Diarization failed, falling back to transcription-only: {e}",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from faster_whisper import WhisperModel
|
|||||||
|
|
||||||
from voice_to_notes.ipc.messages import progress_message
|
from voice_to_notes.ipc.messages import progress_message
|
||||||
from voice_to_notes.ipc.protocol import write_message
|
from voice_to_notes.ipc.protocol import write_message
|
||||||
|
from voice_to_notes.utils.ffmpeg import get_ffmpeg_path, get_ffprobe_path
|
||||||
|
|
||||||
CHUNK_REPORT_SIZE = 10
|
CHUNK_REPORT_SIZE = 10
|
||||||
LARGE_FILE_THRESHOLD_SEC = 3600 # 1 hour
|
LARGE_FILE_THRESHOLD_SEC = 3600 # 1 hour
|
||||||
@@ -202,7 +203,7 @@ class TranscribeService:
|
|||||||
# Get total duration via ffprobe
|
# Get total duration via ffprobe
|
||||||
try:
|
try:
|
||||||
probe_result = subprocess.run(
|
probe_result = subprocess.run(
|
||||||
["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
|
[get_ffprobe_path(), "-v", "quiet", "-show_entries", "format=duration",
|
||||||
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
"-of", "default=noprint_wrappers=1:nokey=1", file_path],
|
||||||
capture_output=True, text=True, check=True,
|
capture_output=True, text=True, check=True,
|
||||||
)
|
)
|
||||||
@@ -235,7 +236,7 @@ class TranscribeService:
|
|||||||
tmp.close()
|
tmp.close()
|
||||||
try:
|
try:
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
["ffmpeg", "-y", "-ss", str(chunk_start),
|
[get_ffmpeg_path(), "-y", "-ss", str(chunk_start),
|
||||||
"-t", str(chunk_duration_sec),
|
"-t", str(chunk_duration_sec),
|
||||||
"-i", file_path,
|
"-i", file_path,
|
||||||
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
"-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le",
|
||||||
|
|||||||
43
python/voice_to_notes/utils/ffmpeg.py
Normal file
43
python/voice_to_notes/utils/ffmpeg.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
"""Resolve ffmpeg/ffprobe paths for both frozen and development builds."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffmpeg_path() -> str:
|
||||||
|
"""Return the path to the ffmpeg binary.
|
||||||
|
|
||||||
|
When running as a frozen PyInstaller bundle, looks next to sys.executable.
|
||||||
|
Otherwise falls back to the system PATH.
|
||||||
|
"""
|
||||||
|
if getattr(sys, "frozen", False):
|
||||||
|
# Frozen PyInstaller bundle — ffmpeg is next to the sidecar binary
|
||||||
|
bundle_dir = os.path.dirname(sys.executable)
|
||||||
|
candidates = [
|
||||||
|
os.path.join(bundle_dir, "ffmpeg.exe" if sys.platform == "win32" else "ffmpeg"),
|
||||||
|
os.path.join(bundle_dir, "ffmpeg"),
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return path
|
||||||
|
return "ffmpeg"
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffprobe_path() -> str:
|
||||||
|
"""Return the path to the ffprobe binary.
|
||||||
|
|
||||||
|
When running as a frozen PyInstaller bundle, looks next to sys.executable.
|
||||||
|
Otherwise falls back to the system PATH.
|
||||||
|
"""
|
||||||
|
if getattr(sys, "frozen", False):
|
||||||
|
bundle_dir = os.path.dirname(sys.executable)
|
||||||
|
candidates = [
|
||||||
|
os.path.join(bundle_dir, "ffprobe.exe" if sys.platform == "win32" else "ffprobe"),
|
||||||
|
os.path.join(bundle_dir, "ffprobe"),
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return path
|
||||||
|
return "ffprobe"
|
||||||
0
src-tauri/binaries/.gitkeep
Normal file
0
src-tauri/binaries/.gitkeep
Normal file
@@ -73,7 +73,7 @@ pub fn download_diarize_model(hf_token: String) -> Result<Value, String> {
|
|||||||
|
|
||||||
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub fn run_pipeline(
|
pub async fn run_pipeline(
|
||||||
app: AppHandle,
|
app: AppHandle,
|
||||||
file_path: String,
|
file_path: String,
|
||||||
model: Option<String>,
|
model: Option<String>,
|
||||||
@@ -106,13 +106,19 @@ pub fn run_pipeline(
|
|||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Run the blocking sidecar I/O on a separate thread so the async runtime
|
||||||
|
// can deliver emitted events to the webview while processing is ongoing.
|
||||||
|
let app_handle = app.clone();
|
||||||
|
tauri::async_runtime::spawn_blocking(move || {
|
||||||
let response = manager.send_and_receive_with_progress(&msg, |msg| {
|
let response = manager.send_and_receive_with_progress(&msg, |msg| {
|
||||||
let event_name = match msg.msg_type.as_str() {
|
let event_name = match msg.msg_type.as_str() {
|
||||||
"pipeline.segment" => "pipeline-segment",
|
"pipeline.segment" => "pipeline-segment",
|
||||||
"pipeline.speaker_update" => "pipeline-speaker-update",
|
"pipeline.speaker_update" => "pipeline-speaker-update",
|
||||||
_ => "pipeline-progress",
|
_ => "pipeline-progress",
|
||||||
};
|
};
|
||||||
let _ = app.emit(event_name, &msg.payload);
|
if let Err(e) = app_handle.emit(event_name, &msg.payload) {
|
||||||
|
eprintln!("[sidecar-rs] Failed to emit {event_name}: {e}");
|
||||||
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if response.msg_type == "error" {
|
if response.msg_type == "error" {
|
||||||
@@ -127,4 +133,7 @@ pub fn run_pipeline(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Ok(response.payload)
|
Ok(response.payload)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Pipeline task failed: {e}"))?
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,8 +13,13 @@ pub fn sidecar() -> &'static SidecarManager {
|
|||||||
INSTANCE.get_or_init(SidecarManager::new)
|
INSTANCE.get_or_init(SidecarManager::new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manages the Python sidecar process lifecycle.
|
/// Manages the sidecar process lifecycle.
|
||||||
/// Uses separated stdin/stdout ownership to avoid BufReader conflicts.
|
///
|
||||||
|
/// Supports two modes:
|
||||||
|
/// - **Production**: spawns a frozen PyInstaller binary (no Python required)
|
||||||
|
/// - **Dev mode**: spawns system Python with `-m voice_to_notes.main`
|
||||||
|
///
|
||||||
|
/// Dev mode is active when compiled in debug mode or when `VOICE_TO_NOTES_DEV=1`.
|
||||||
pub struct SidecarManager {
|
pub struct SidecarManager {
|
||||||
process: Mutex<Option<Child>>,
|
process: Mutex<Option<Child>>,
|
||||||
stdin: Mutex<Option<ChildStdin>>,
|
stdin: Mutex<Option<ChildStdin>>,
|
||||||
@@ -30,38 +35,141 @@ impl SidecarManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if we should use dev mode (system Python).
|
||||||
|
fn is_dev_mode() -> bool {
|
||||||
|
cfg!(debug_assertions) || std::env::var("VOICE_TO_NOTES_DEV").is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the frozen sidecar binary path (production mode).
|
||||||
|
fn resolve_sidecar_path() -> Result<std::path::PathBuf, String> {
|
||||||
|
let exe = std::env::current_exe().map_err(|e| format!("Cannot get current exe: {e}"))?;
|
||||||
|
let exe_dir = exe
|
||||||
|
.parent()
|
||||||
|
.ok_or_else(|| "Cannot get exe parent directory".to_string())?;
|
||||||
|
|
||||||
|
let binary_name = if cfg!(target_os = "windows") {
|
||||||
|
"voice-to-notes-sidecar.exe"
|
||||||
|
} else {
|
||||||
|
"voice-to-notes-sidecar"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Tauri places externalBin next to the app binary
|
||||||
|
let path = exe_dir.join(binary_name);
|
||||||
|
if path.exists() {
|
||||||
|
return Ok(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also check inside a subdirectory (onedir PyInstaller output)
|
||||||
|
let subdir_path = exe_dir.join("voice-to-notes-sidecar").join(binary_name);
|
||||||
|
if subdir_path.exists() {
|
||||||
|
return Ok(subdir_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!(
|
||||||
|
"Sidecar binary not found. Looked for:\n {}\n {}",
|
||||||
|
path.display(),
|
||||||
|
subdir_path.display(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find a working Python command for the current platform.
|
||||||
|
fn find_python_command() -> &'static str {
|
||||||
|
if cfg!(target_os = "windows") {
|
||||||
|
"python"
|
||||||
|
} else {
|
||||||
|
"python3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolve the Python sidecar directory for dev mode.
|
||||||
|
fn resolve_python_dir() -> Result<std::path::PathBuf, String> {
|
||||||
|
let manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||||
|
let python_dir = std::path::Path::new(manifest_dir)
|
||||||
|
.join("../python")
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
||||||
|
|
||||||
|
if python_dir.exists() {
|
||||||
|
return Ok(python_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: relative to current exe
|
||||||
|
let exe = std::env::current_exe().map_err(|e| e.to_string())?;
|
||||||
|
let alt = exe
|
||||||
|
.parent()
|
||||||
|
.ok_or_else(|| "No parent dir".to_string())?
|
||||||
|
.join("../python")
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
||||||
|
|
||||||
|
Ok(alt)
|
||||||
|
}
|
||||||
|
|
||||||
/// Ensure the sidecar is running, starting it if needed.
|
/// Ensure the sidecar is running, starting it if needed.
|
||||||
pub fn ensure_running(&self) -> Result<(), String> {
|
pub fn ensure_running(&self) -> Result<(), String> {
|
||||||
if self.is_running() {
|
if self.is_running() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let python_path = std::env::current_dir()
|
if Self::is_dev_mode() {
|
||||||
.map_err(|e| e.to_string())?
|
self.start_python_dev()
|
||||||
.join("../python")
|
} else {
|
||||||
.canonicalize()
|
match Self::resolve_sidecar_path() {
|
||||||
.map_err(|e| format!("Cannot find python directory: {e}"))?;
|
Ok(path) => self.start_binary(&path),
|
||||||
|
Err(e) => {
|
||||||
self.start(&python_path.to_string_lossy())
|
eprintln!(
|
||||||
|
"[sidecar-rs] Frozen binary not found ({e}), falling back to dev mode"
|
||||||
|
);
|
||||||
|
self.start_python_dev()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Spawn the Python sidecar process.
|
/// Spawn the frozen sidecar binary (production mode).
|
||||||
pub fn start(&self, python_path: &str) -> Result<(), String> {
|
fn start_binary(&self, path: &std::path::Path) -> Result<(), String> {
|
||||||
// Stop existing process if any
|
|
||||||
self.stop().ok();
|
self.stop().ok();
|
||||||
|
eprintln!("[sidecar-rs] Starting frozen sidecar: {}", path.display());
|
||||||
|
|
||||||
let mut child = Command::new("python3")
|
let child = Command::new(path)
|
||||||
.arg("-m")
|
|
||||||
.arg("voice_to_notes.main")
|
|
||||||
.current_dir(python_path)
|
|
||||||
.env("PYTHONPATH", python_path)
|
|
||||||
.stdin(Stdio::piped())
|
.stdin(Stdio::piped())
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.stderr(Stdio::inherit())
|
.stderr(Stdio::inherit())
|
||||||
.spawn()
|
.spawn()
|
||||||
.map_err(|e| format!("Failed to start sidecar: {e}"))?;
|
.map_err(|e| format!("Failed to start sidecar binary: {e}"))?;
|
||||||
|
|
||||||
// Take ownership of stdin and stdout separately
|
self.attach(child)?;
|
||||||
|
self.wait_for_ready()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn the Python sidecar in dev mode (system Python).
|
||||||
|
fn start_python_dev(&self) -> Result<(), String> {
|
||||||
|
self.stop().ok();
|
||||||
|
let python_dir = Self::resolve_python_dir()?;
|
||||||
|
let python_cmd = Self::find_python_command();
|
||||||
|
eprintln!(
|
||||||
|
"[sidecar-rs] Starting dev sidecar: {} -m voice_to_notes.main ({})",
|
||||||
|
python_cmd,
|
||||||
|
python_dir.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
let child = Command::new(python_cmd)
|
||||||
|
.arg("-m")
|
||||||
|
.arg("voice_to_notes.main")
|
||||||
|
.current_dir(&python_dir)
|
||||||
|
.env("PYTHONPATH", &python_dir)
|
||||||
|
.stdin(Stdio::piped())
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::inherit())
|
||||||
|
.spawn()
|
||||||
|
.map_err(|e| format!("Failed to start Python sidecar: {e}"))?;
|
||||||
|
|
||||||
|
self.attach(child)?;
|
||||||
|
self.wait_for_ready()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take ownership of a spawned child's stdin/stdout and store the process handle.
|
||||||
|
fn attach(&self, mut child: Child) -> Result<(), String> {
|
||||||
let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
|
let stdin = child.stdin.take().ok_or("Failed to get sidecar stdin")?;
|
||||||
let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
|
let stdout = child.stdout.take().ok_or("Failed to get sidecar stdout")?;
|
||||||
let buf_reader = BufReader::new(stdout);
|
let buf_reader = BufReader::new(stdout);
|
||||||
@@ -78,10 +186,6 @@ impl SidecarManager {
|
|||||||
let mut r = self.reader.lock().map_err(|e| e.to_string())?;
|
let mut r = self.reader.lock().map_err(|e| e.to_string())?;
|
||||||
*r = Some(buf_reader);
|
*r = Some(buf_reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for the "ready" message
|
|
||||||
self.wait_for_ready()?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,70 +228,6 @@ impl SidecarManager {
|
|||||||
self.send_and_receive_with_progress(msg, |_| {})
|
self.send_and_receive_with_progress(msg, |_| {})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send a message and read the response, calling on_progress for each progress message.
|
|
||||||
pub fn send_and_receive_with_progress(
|
|
||||||
&self,
|
|
||||||
msg: &IPCMessage,
|
|
||||||
on_progress: impl Fn(&IPCMessage),
|
|
||||||
) -> Result<IPCMessage, String> {
|
|
||||||
// Write to stdin
|
|
||||||
{
|
|
||||||
let mut stdin_guard = self.stdin.lock().map_err(|e| e.to_string())?;
|
|
||||||
if let Some(ref mut stdin) = *stdin_guard {
|
|
||||||
let json = serde_json::to_string(msg).map_err(|e| e.to_string())?;
|
|
||||||
stdin
|
|
||||||
.write_all(json.as_bytes())
|
|
||||||
.map_err(|e| format!("Write error: {e}"))?;
|
|
||||||
stdin
|
|
||||||
.write_all(b"\n")
|
|
||||||
.map_err(|e| format!("Write error: {e}"))?;
|
|
||||||
stdin.flush().map_err(|e| format!("Flush error: {e}"))?;
|
|
||||||
} else {
|
|
||||||
return Err("Sidecar stdin not available".to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read from stdout
|
|
||||||
{
|
|
||||||
let mut reader_guard = self.reader.lock().map_err(|e| e.to_string())?;
|
|
||||||
if let Some(ref mut reader) = *reader_guard {
|
|
||||||
let mut line = String::new();
|
|
||||||
loop {
|
|
||||||
line.clear();
|
|
||||||
let bytes_read = reader
|
|
||||||
.read_line(&mut line)
|
|
||||||
.map_err(|e| format!("Read error: {e}"))?;
|
|
||||||
if bytes_read == 0 {
|
|
||||||
return Err("Sidecar closed stdout".to_string());
|
|
||||||
}
|
|
||||||
let trimmed = line.trim();
|
|
||||||
if trimmed.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Skip non-JSON lines (library output that leaked to stdout)
|
|
||||||
let response: IPCMessage = match serde_json::from_str(trimmed) {
|
|
||||||
Ok(msg) => msg,
|
|
||||||
Err(_) => {
|
|
||||||
eprintln!(
|
|
||||||
"[sidecar-rs] Skipping non-JSON line: {}",
|
|
||||||
&trimmed[..trimmed.len().min(200)]
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if response.msg_type == "progress" {
|
|
||||||
on_progress(&response);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return Ok(response);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Err("Sidecar stdout not available".to_string())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Send a message and receive the response, calling a callback for intermediate messages.
|
/// Send a message and receive the response, calling a callback for intermediate messages.
|
||||||
/// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
|
/// Intermediate messages include progress, pipeline.segment, and pipeline.speaker_update.
|
||||||
pub fn send_and_receive_with_progress<F>(
|
pub fn send_and_receive_with_progress<F>(
|
||||||
|
|||||||
@@ -46,7 +46,7 @@
|
|||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"linux": {
|
"linux": {
|
||||||
"deb": {
|
"deb": {
|
||||||
"depends": ["python3", "python3-pip"]
|
"depends": []
|
||||||
},
|
},
|
||||||
"appimage": {
|
"appimage": {
|
||||||
"bundleMediaFramework": true
|
"bundleMediaFramework": true
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { invoke } from '@tauri-apps/api/core';
|
import { invoke } from '@tauri-apps/api/core';
|
||||||
import { segments, speakers } from '$lib/stores/transcript';
|
import { segments, speakers } from '$lib/stores/transcript';
|
||||||
|
import { settings } from '$lib/stores/settings';
|
||||||
|
|
||||||
interface ChatMessage {
|
interface ChatMessage {
|
||||||
role: 'user' | 'assistant';
|
role: 'user' | 'assistant';
|
||||||
@@ -43,9 +44,23 @@
|
|||||||
content: m.content,
|
content: m.content,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Ensure the provider is configured with current credentials before chatting
|
||||||
|
const s = $settings;
|
||||||
|
const configMap: Record<string, Record<string, string>> = {
|
||||||
|
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
||||||
|
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
||||||
|
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
||||||
|
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
|
||||||
|
};
|
||||||
|
const config = configMap[s.ai_provider];
|
||||||
|
if (config) {
|
||||||
|
await invoke('ai_configure', { provider: s.ai_provider, config });
|
||||||
|
}
|
||||||
|
|
||||||
const result = await invoke<{ response: string }>('ai_chat', {
|
const result = await invoke<{ response: string }>('ai_chat', {
|
||||||
messages: chatMessages,
|
messages: chatMessages,
|
||||||
transcriptContext: getTranscriptContext(),
|
transcriptContext: getTranscriptContext(),
|
||||||
|
provider: s.ai_provider,
|
||||||
});
|
});
|
||||||
|
|
||||||
messages = [...messages, { role: 'assistant', content: result.response }];
|
messages = [...messages, { role: 'assistant', content: result.response }];
|
||||||
|
|||||||
@@ -8,17 +8,7 @@
|
|||||||
|
|
||||||
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
|
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
|
||||||
|
|
||||||
// Map internal stage names to user-friendly labels
|
// Pipeline steps in order
|
||||||
const stageLabels: Record<string, string> = {
|
|
||||||
'pipeline': 'Pipeline',
|
|
||||||
'loading_model': 'Loading Model',
|
|
||||||
'transcribing': 'Transcribing',
|
|
||||||
'loading_diarization': 'Loading Diarization',
|
|
||||||
'diarizing': 'Speaker Detection',
|
|
||||||
'done': 'Complete',
|
|
||||||
};
|
|
||||||
|
|
||||||
// Pipeline steps for the task list
|
|
||||||
const pipelineSteps = [
|
const pipelineSteps = [
|
||||||
{ key: 'loading_model', label: 'Load transcription model' },
|
{ key: 'loading_model', label: 'Load transcription model' },
|
||||||
{ key: 'transcribing', label: 'Transcribe audio' },
|
{ key: 'transcribing', label: 'Transcribe audio' },
|
||||||
@@ -27,17 +17,47 @@
|
|||||||
{ key: 'merging', label: 'Merge results' },
|
{ key: 'merging', label: 'Merge results' },
|
||||||
];
|
];
|
||||||
|
|
||||||
function getStepStatus(stepKey: string, currentStage: string): 'pending' | 'active' | 'done' {
|
|
||||||
const stepOrder = pipelineSteps.map(s => s.key);
|
const stepOrder = pipelineSteps.map(s => s.key);
|
||||||
const currentIdx = stepOrder.indexOf(currentStage);
|
|
||||||
const stepIdx = stepOrder.indexOf(stepKey);
|
|
||||||
|
|
||||||
if (currentStage === 'done') return 'done';
|
// Track the highest step index we've reached (never goes backward)
|
||||||
if (stepIdx < currentIdx) return 'done';
|
let highestStepIdx = $state(-1);
|
||||||
if (stepIdx === currentIdx) return 'active';
|
|
||||||
|
// Map non-step stages to step indices for progress tracking
|
||||||
|
function stageToStepIdx(s: string): number {
|
||||||
|
const direct = stepOrder.indexOf(s);
|
||||||
|
if (direct >= 0) return direct;
|
||||||
|
// 'pipeline' stage appears before known steps — don't change highwater mark
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
if (!visible) {
|
||||||
|
highestStepIdx = -1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const idx = stageToStepIdx(stage);
|
||||||
|
if (idx > highestStepIdx) {
|
||||||
|
highestStepIdx = idx;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
|
||||||
|
if (stepIdx < highestStepIdx) return 'done';
|
||||||
|
if (stepIdx === highestStepIdx) return 'active';
|
||||||
return 'pending';
|
return 'pending';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// User-friendly display of current stage
|
||||||
|
const stageLabels: Record<string, string> = {
|
||||||
|
'pipeline': 'Initializing...',
|
||||||
|
'loading_model': 'Loading Model',
|
||||||
|
'transcribing': 'Transcribing',
|
||||||
|
'loading_diarization': 'Loading Diarization',
|
||||||
|
'diarizing': 'Speaker Detection',
|
||||||
|
'merging': 'Merging Results',
|
||||||
|
'done': 'Complete',
|
||||||
|
};
|
||||||
|
|
||||||
let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
|
let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -50,8 +70,8 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="steps">
|
<div class="steps">
|
||||||
{#each pipelineSteps as step}
|
{#each pipelineSteps as step, idx}
|
||||||
{@const status = getStepStatus(step.key, stage)}
|
{@const status = getStepStatus(idx)}
|
||||||
<div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
|
<div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
|
||||||
<span class="step-icon">
|
<span class="step-icon">
|
||||||
{#if status === 'done'}
|
{#if status === 'done'}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
||||||
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
|
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
|
||||||
let modelError = $state('');
|
let modelError = $state('');
|
||||||
|
let revealedFields = $state<Set<string>>(new Set());
|
||||||
|
|
||||||
async function testAndDownloadModel() {
|
async function testAndDownloadModel() {
|
||||||
if (!localSettings.hf_token) {
|
if (!localSettings.hf_token) {
|
||||||
@@ -111,7 +112,10 @@
|
|||||||
{:else if activeTab === 'speakers'}
|
{:else if activeTab === 'speakers'}
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="hf-token">HuggingFace Token</label>
|
<label for="hf-token">HuggingFace Token</label>
|
||||||
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
|
<div class="input-reveal">
|
||||||
|
<input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="info-box">
|
<div class="info-box">
|
||||||
<p class="info-title">Setup (one-time)</p>
|
<p class="info-title">Setup (one-time)</p>
|
||||||
@@ -150,6 +154,23 @@
|
|||||||
{#if modelStatus === 'error'}
|
{#if modelStatus === 'error'}
|
||||||
<p class="status-error">{modelError}</p>
|
<p class="status-error">{modelError}</p>
|
||||||
{/if}
|
{/if}
|
||||||
|
<div class="field" style="margin-top: 1rem;">
|
||||||
|
<label for="num-speakers">Number of speakers</label>
|
||||||
|
<select
|
||||||
|
id="num-speakers"
|
||||||
|
value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
|
||||||
|
onchange={(e) => {
|
||||||
|
const v = parseInt((e.target as HTMLSelectElement).value, 10);
|
||||||
|
localSettings.num_speakers = v === 0 ? null : v;
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<option value="0">Auto-detect</option>
|
||||||
|
{#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
|
||||||
|
<option value={String(n)}>{n}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
<p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
|
||||||
|
</div>
|
||||||
<div class="field checkbox" style="margin-top: 1rem;">
|
<div class="field checkbox" style="margin-top: 1rem;">
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
||||||
@@ -163,14 +184,17 @@
|
|||||||
<option value="local">Local (llama-server)</option>
|
<option value="local">Local (llama-server)</option>
|
||||||
<option value="openai">OpenAI</option>
|
<option value="openai">OpenAI</option>
|
||||||
<option value="anthropic">Anthropic</option>
|
<option value="anthropic">Anthropic</option>
|
||||||
<option value="litellm">LiteLLM</option>
|
<option value="litellm">OpenAI Compatible</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{#if localSettings.ai_provider === 'openai'}
|
{#if localSettings.ai_provider === 'openai'}
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="openai-key">OpenAI API Key</label>
|
<label for="openai-key">OpenAI API Key</label>
|
||||||
<input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
|
<div class="input-reveal">
|
||||||
|
<input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="openai-model">Model</label>
|
<label for="openai-model">Model</label>
|
||||||
@@ -179,13 +203,27 @@
|
|||||||
{:else if localSettings.ai_provider === 'anthropic'}
|
{:else if localSettings.ai_provider === 'anthropic'}
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="anthropic-key">Anthropic API Key</label>
|
<label for="anthropic-key">Anthropic API Key</label>
|
||||||
<input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
|
<div class="input-reveal">
|
||||||
|
<input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="anthropic-model">Model</label>
|
<label for="anthropic-model">Model</label>
|
||||||
<input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
|
<input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
|
||||||
</div>
|
</div>
|
||||||
{:else if localSettings.ai_provider === 'litellm'}
|
{:else if localSettings.ai_provider === 'litellm'}
|
||||||
|
<div class="field">
|
||||||
|
<label for="litellm-base">API Base URL</label>
|
||||||
|
<input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="litellm-key">API Key</label>
|
||||||
|
<div class="input-reveal">
|
||||||
|
<input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
|
||||||
|
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label for="litellm-model">Model</label>
|
<label for="litellm-model">Model</label>
|
||||||
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
|
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
|
||||||
@@ -293,11 +331,36 @@
|
|||||||
color: #aaa;
|
color: #aaa;
|
||||||
margin-bottom: 0.3rem;
|
margin-bottom: 0.3rem;
|
||||||
}
|
}
|
||||||
|
.input-reveal {
|
||||||
|
display: flex;
|
||||||
|
gap: 0;
|
||||||
|
}
|
||||||
|
.input-reveal input {
|
||||||
|
flex: 1;
|
||||||
|
border-top-right-radius: 0;
|
||||||
|
border-bottom-right-radius: 0;
|
||||||
|
}
|
||||||
|
.reveal-btn {
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
border-left: none;
|
||||||
|
color: #aaa;
|
||||||
|
padding: 0.5rem 0.6rem;
|
||||||
|
border-radius: 0 4px 4px 0;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.reveal-btn:hover {
|
||||||
|
color: #e0e0e0;
|
||||||
|
background: #1a4a7a;
|
||||||
|
}
|
||||||
.field input,
|
.field input,
|
||||||
.field select {
|
.field select {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background: #1a1a2e;
|
background: #1a1a2e;
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
|
color-scheme: dark;
|
||||||
border: 1px solid #4a5568;
|
border: 1px solid #4a5568;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
let container: HTMLDivElement;
|
let container: HTMLDivElement;
|
||||||
let wavesurfer: WaveSurfer | null = $state(null);
|
let wavesurfer: WaveSurfer | null = $state(null);
|
||||||
let isReady = $state(false);
|
let isReady = $state(false);
|
||||||
|
let isLoading = $state(false);
|
||||||
let currentTime = $state('0:00');
|
let currentTime = $state('0:00');
|
||||||
let totalTime = $state('0:00');
|
let totalTime = $state('0:00');
|
||||||
|
|
||||||
@@ -32,6 +33,7 @@
|
|||||||
barWidth: 2,
|
barWidth: 2,
|
||||||
barGap: 1,
|
barGap: 1,
|
||||||
barRadius: 2,
|
barRadius: 2,
|
||||||
|
backend: 'WebAudio',
|
||||||
});
|
});
|
||||||
|
|
||||||
wavesurfer.on('timeupdate', (time: number) => {
|
wavesurfer.on('timeupdate', (time: number) => {
|
||||||
@@ -41,6 +43,7 @@
|
|||||||
|
|
||||||
wavesurfer.on('ready', () => {
|
wavesurfer.on('ready', () => {
|
||||||
isReady = true;
|
isReady = true;
|
||||||
|
isLoading = false;
|
||||||
const dur = wavesurfer!.getDuration();
|
const dur = wavesurfer!.getDuration();
|
||||||
durationMs.set(Math.round(dur * 1000));
|
durationMs.set(Math.round(dur * 1000));
|
||||||
totalTime = formatTime(dur);
|
totalTime = formatTime(dur);
|
||||||
@@ -55,7 +58,7 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (audioUrl) {
|
if (audioUrl) {
|
||||||
wavesurfer.load(audioUrl);
|
loadAudio(audioUrl);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -89,16 +92,13 @@
|
|||||||
console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
|
console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const timeSec = timeMs / 1000;
|
wavesurfer.setTime(timeMs / 1000);
|
||||||
wavesurfer.setTime(timeSec);
|
|
||||||
if (!wavesurfer.isPlaying()) {
|
|
||||||
wavesurfer.play();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Load a new audio file. */
|
/** Load a new audio file. */
|
||||||
export function loadAudio(url: string) {
|
export function loadAudio(url: string) {
|
||||||
isReady = false;
|
isReady = false;
|
||||||
|
isLoading = true;
|
||||||
wavesurfer?.load(url);
|
wavesurfer?.load(url);
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ export interface AppSettings {
|
|||||||
openai_model: string;
|
openai_model: string;
|
||||||
anthropic_model: string;
|
anthropic_model: string;
|
||||||
litellm_model: string;
|
litellm_model: string;
|
||||||
|
litellm_api_key: string;
|
||||||
|
litellm_api_base: string;
|
||||||
local_model_path: string;
|
local_model_path: string;
|
||||||
local_binary_path: string;
|
local_binary_path: string;
|
||||||
transcription_model: string;
|
transcription_model: string;
|
||||||
@@ -15,6 +17,7 @@ export interface AppSettings {
|
|||||||
transcription_language: string;
|
transcription_language: string;
|
||||||
skip_diarization: boolean;
|
skip_diarization: boolean;
|
||||||
hf_token: string;
|
hf_token: string;
|
||||||
|
num_speakers: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaults: AppSettings = {
|
const defaults: AppSettings = {
|
||||||
@@ -24,6 +27,8 @@ const defaults: AppSettings = {
|
|||||||
openai_model: 'gpt-4o-mini',
|
openai_model: 'gpt-4o-mini',
|
||||||
anthropic_model: 'claude-sonnet-4-6',
|
anthropic_model: 'claude-sonnet-4-6',
|
||||||
litellm_model: 'gpt-4o-mini',
|
litellm_model: 'gpt-4o-mini',
|
||||||
|
litellm_api_key: '',
|
||||||
|
litellm_api_base: '',
|
||||||
local_model_path: '',
|
local_model_path: '',
|
||||||
local_binary_path: 'llama-server',
|
local_binary_path: 'llama-server',
|
||||||
transcription_model: 'base',
|
transcription_model: 'base',
|
||||||
@@ -31,6 +36,7 @@ const defaults: AppSettings = {
|
|||||||
transcription_language: '',
|
transcription_language: '',
|
||||||
skip_diarization: false,
|
skip_diarization: false,
|
||||||
hf_token: '',
|
hf_token: '',
|
||||||
|
num_speakers: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
export const settings = writable<AppSettings>({ ...defaults });
|
export const settings = writable<AppSettings>({ ...defaults });
|
||||||
@@ -47,4 +53,20 @@ export async function loadSettings(): Promise<void> {
|
|||||||
export async function saveSettings(s: AppSettings): Promise<void> {
|
export async function saveSettings(s: AppSettings): Promise<void> {
|
||||||
settings.set(s);
|
settings.set(s);
|
||||||
await invoke('save_settings', { settings: s });
|
await invoke('save_settings', { settings: s });
|
||||||
|
|
||||||
|
// Configure the AI provider in the Python sidecar
|
||||||
|
const configMap: Record<string, Record<string, string>> = {
|
||||||
|
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
||||||
|
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
||||||
|
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
||||||
|
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
|
||||||
|
};
|
||||||
|
const config = configMap[s.ai_provider];
|
||||||
|
if (config) {
|
||||||
|
try {
|
||||||
|
await invoke('ai_configure', { provider: s.ai_provider, config });
|
||||||
|
} catch {
|
||||||
|
// Sidecar may not be running yet — provider will be configured on first use
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
import type { Segment, Speaker } from '$lib/types/transcript';
|
import type { Segment, Speaker } from '$lib/types/transcript';
|
||||||
import { onMount, tick } from 'svelte';
|
import { onMount, tick } from 'svelte';
|
||||||
|
|
||||||
|
let appReady = $state(false);
|
||||||
let waveformPlayer: WaveformPlayer;
|
let waveformPlayer: WaveformPlayer;
|
||||||
let audioUrl = $state('');
|
let audioUrl = $state('');
|
||||||
let showSettings = $state(false);
|
let showSettings = $state(false);
|
||||||
@@ -54,6 +55,8 @@
|
|||||||
document.addEventListener('keydown', handleKeyDown);
|
document.addEventListener('keydown', handleKeyDown);
|
||||||
document.addEventListener('click', handleClickOutside);
|
document.addEventListener('click', handleClickOutside);
|
||||||
|
|
||||||
|
appReady = true;
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
document.removeEventListener('keydown', handleKeyDown);
|
document.removeEventListener('keydown', handleKeyDown);
|
||||||
document.removeEventListener('click', handleClickOutside);
|
document.removeEventListener('click', handleClickOutside);
|
||||||
@@ -200,6 +203,7 @@
|
|||||||
language: $settings.transcription_language || undefined,
|
language: $settings.transcription_language || undefined,
|
||||||
skipDiarization: $settings.skip_diarization || undefined,
|
skipDiarization: $settings.skip_diarization || undefined,
|
||||||
hfToken: $settings.hf_token || undefined,
|
hfToken: $settings.hf_token || undefined,
|
||||||
|
numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Create speaker entries from pipeline result
|
// Create speaker entries from pipeline result
|
||||||
@@ -303,7 +307,15 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div class="app-header">
|
{#if !appReady}
|
||||||
|
<div class="splash-screen">
|
||||||
|
<h1 class="splash-title">Voice to Notes</h1>
|
||||||
|
<p class="splash-subtitle">Loading...</p>
|
||||||
|
<div class="splash-spinner"></div>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="app-shell">
|
||||||
|
<div class="app-header">
|
||||||
<h1>Voice to Notes</h1>
|
<h1>Voice to Notes</h1>
|
||||||
<div class="header-actions">
|
<div class="header-actions">
|
||||||
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
|
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
|
||||||
@@ -333,9 +345,9 @@
|
|||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="workspace">
|
<div class="workspace">
|
||||||
<div class="main-content">
|
<div class="main-content">
|
||||||
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
|
<WaveformPlayer bind:this={waveformPlayer} {audioUrl} />
|
||||||
<TranscriptEditor onWordClick={handleWordClick} />
|
<TranscriptEditor onWordClick={handleWordClick} />
|
||||||
@@ -344,19 +356,21 @@
|
|||||||
<SpeakerManager />
|
<SpeakerManager />
|
||||||
<AIChatPanel />
|
<AIChatPanel />
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<ProgressOverlay
|
<ProgressOverlay
|
||||||
visible={isTranscribing}
|
visible={isTranscribing}
|
||||||
percent={transcriptionProgress}
|
percent={transcriptionProgress}
|
||||||
stage={transcriptionStage}
|
stage={transcriptionStage}
|
||||||
message={transcriptionMessage}
|
message={transcriptionMessage}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<SettingsModal
|
<SettingsModal
|
||||||
visible={showSettings}
|
visible={showSettings}
|
||||||
onClose={() => showSettings = false}
|
onClose={() => showSettings = false}
|
||||||
/>
|
/>
|
||||||
|
{/if}
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
.app-header {
|
.app-header {
|
||||||
@@ -453,11 +467,18 @@
|
|||||||
.export-option:hover {
|
.export-option:hover {
|
||||||
background: rgba(233, 69, 96, 0.2);
|
background: rgba(233, 69, 96, 0.2);
|
||||||
}
|
}
|
||||||
|
.app-shell {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100vh;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
.workspace {
|
.workspace {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
padding: 1rem;
|
padding: 1rem;
|
||||||
height: calc(100vh - 3rem);
|
flex: 1;
|
||||||
|
min-height: 0;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
background: #0a0a23;
|
background: #0a0a23;
|
||||||
}
|
}
|
||||||
@@ -467,6 +488,8 @@
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
min-width: 0;
|
min-width: 0;
|
||||||
|
min-height: 0;
|
||||||
|
overflow-y: auto;
|
||||||
}
|
}
|
||||||
.sidebar-right {
|
.sidebar-right {
|
||||||
width: 300px;
|
width: 300px;
|
||||||
@@ -474,5 +497,38 @@
|
|||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
|
min-height: 0;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
.splash-screen {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 100vh;
|
||||||
|
background: #0a0a23;
|
||||||
|
color: #e0e0e0;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
.splash-title {
|
||||||
|
font-size: 2rem;
|
||||||
|
margin: 0;
|
||||||
|
color: #e94560;
|
||||||
|
}
|
||||||
|
.splash-subtitle {
|
||||||
|
font-size: 1rem;
|
||||||
|
color: #888;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
.splash-spinner {
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
border: 3px solid #2a3a5e;
|
||||||
|
border-top-color: #e94560;
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
|
@keyframes spin {
|
||||||
|
to { transform: rotate(360deg); }
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
Reference in New Issue
Block a user