Rewrite frontend to Tauri v2 + Svelte 5 for cross-platform support #4
9
.claude/settings.local.json
Normal file
9
.claude/settings.local.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(python3:*)",
|
||||||
|
"Bash(node --check:*)",
|
||||||
|
"Bash(ls:*)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
414
.gitea/workflows/build-sidecar.yml
Normal file
414
.gitea/workflows/build-sidecar.yml
Normal file
@@ -0,0 +1,414 @@
|
|||||||
|
name: Build Sidecars
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- 'client/**'
|
||||||
|
- 'server/**'
|
||||||
|
- 'backend/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'local-transcription-headless.spec'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bump-sidecar-version:
|
||||||
|
name: Bump sidecar version and tag
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
version: ${{ steps.bump.outputs.version }}
|
||||||
|
tag: ${{ steps.bump.outputs.tag }}
|
||||||
|
has_changes: ${{ steps.check_changes.outputs.has_changes }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 2
|
||||||
|
|
||||||
|
- name: Check for backend changes
|
||||||
|
id: check_changes
|
||||||
|
run: |
|
||||||
|
# If triggered by workflow_dispatch, always build
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# Check if relevant files changed in this commit
|
||||||
|
CHANGED=$(git diff --name-only HEAD~1 HEAD -- client/ server/ backend/ pyproject.toml local-transcription-headless.spec 2>/dev/null || echo "")
|
||||||
|
if [ -n "$CHANGED" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "Backend changes detected: $CHANGED"
|
||||||
|
else
|
||||||
|
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "No backend changes detected, skipping sidecar build"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Configure git
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
run: |
|
||||||
|
git config user.name "Gitea Actions"
|
||||||
|
git config user.email "actions@gitea.local"
|
||||||
|
|
||||||
|
- name: Bump sidecar patch version
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
id: bump
|
||||||
|
run: |
|
||||||
|
# Read current version from pyproject.toml
|
||||||
|
CURRENT=$(grep '^version = ' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
|
||||||
|
echo "Current sidecar version: ${CURRENT}"
|
||||||
|
|
||||||
|
# Increment patch number
|
||||||
|
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
echo "New sidecar version: ${NEW_VERSION}"
|
||||||
|
|
||||||
|
# Update pyproject.toml
|
||||||
|
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
|
||||||
|
|
||||||
|
# Update version.py
|
||||||
|
sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
|
||||||
|
sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
|
||||||
|
|
||||||
|
echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and tag
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
NEW_VERSION="${{ steps.bump.outputs.version }}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
git add pyproject.toml version.py
|
||||||
|
git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
|
||||||
|
git tag "${TAG}"
|
||||||
|
|
||||||
|
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
|
||||||
|
git pull --rebase "${REMOTE_URL}" main || true
|
||||||
|
git push "${REMOTE_URL}" HEAD:main
|
||||||
|
git push "${REMOTE_URL}" "${TAG}"
|
||||||
|
|
||||||
|
- name: Create Gitea release
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
VERSION="${{ steps.bump.outputs.version }}"
|
||||||
|
RELEASE_NAME="Sidecar v${VERSION}"
|
||||||
|
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
|
||||||
|
"${REPO_API}/releases"
|
||||||
|
echo "Created release: ${RELEASE_NAME}"
|
||||||
|
|
||||||
|
# ── Linux sidecar (CUDA + CPU) ──
|
||||||
|
|
||||||
|
build-sidecar-linux:
|
||||||
|
name: Build Sidecar (Linux)
|
||||||
|
needs: bump-sidecar-version
|
||||||
|
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y portaudio19-dev
|
||||||
|
|
||||||
|
- name: Build sidecar (CUDA)
|
||||||
|
run: |
|
||||||
|
uv sync
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CUDA)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cuda.zip .
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
rm -rf dist/local-transcription-backend build/
|
||||||
|
# Install CPU-only PyTorch
|
||||||
|
uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cpu.zip .
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
|
||||||
|
|
||||||
|
echo "Waiting for sidecar release ${TAG} to be available..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for file in sidecar-*.zip; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
|
|
||||||
|
# ── Windows sidecar (CUDA + CPU) ──
|
||||||
|
|
||||||
|
build-sidecar-windows:
|
||||||
|
name: Build Sidecar (Windows)
|
||||||
|
needs: bump-sidecar-version
|
||||||
|
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
|
||||||
|
runs-on: windows-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command uv -ErrorAction SilentlyContinue) {
|
||||||
|
Write-Host "uv already installed: $(uv --version)"
|
||||||
|
} else {
|
||||||
|
irm https://astral.sh/uv/install.ps1 | iex
|
||||||
|
echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
shell: powershell
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install 7-Zip
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
|
||||||
|
choco install 7zip -y
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Build sidecar (CUDA)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
uv sync
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CUDA)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\dist\local-transcription-backend\*
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
Remove-Item -Recurse -Force dist\local-transcription-backend, build -ErrorAction SilentlyContinue
|
||||||
|
uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\dist\local-transcription-backend\*
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
shell: powershell
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||||
|
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
|
||||||
|
$TAG = "${{ needs.bump-sidecar-version.outputs.tag }}"
|
||||||
|
|
||||||
|
Write-Host "Waiting for sidecar release ${TAG} to be available..."
|
||||||
|
$RELEASE_ID = $null
|
||||||
|
|
||||||
|
for ($i = 1; $i -le 30; $i++) {
|
||||||
|
try {
|
||||||
|
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
|
||||||
|
$RELEASE_ID = $release.id
|
||||||
|
|
||||||
|
if ($RELEASE_ID) {
|
||||||
|
Write-Host "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
Start-Sleep -Seconds 10
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not $RELEASE_ID) {
|
||||||
|
Write-Host "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
|
||||||
|
$filename = $_.Name
|
||||||
|
$encodedName = [System.Uri]::EscapeDataString($filename)
|
||||||
|
$size = [math]::Round($_.Length / 1MB, 1)
|
||||||
|
Write-Host "Uploading ${filename} (${size} MB)..."
|
||||||
|
|
||||||
|
try {
|
||||||
|
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
|
||||||
|
$existing = $assets | Where-Object { $_.name -eq $filename }
|
||||||
|
if ($existing) {
|
||||||
|
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
|
||||||
|
$result = curl.exe --fail --silent --show-error `
|
||||||
|
-X POST `
|
||||||
|
-H "Authorization: token $env:BUILD_TOKEN" `
|
||||||
|
-H "Content-Type: application/octet-stream" `
|
||||||
|
-T "$($_.FullName)" `
|
||||||
|
"$uploadUrl" 2>&1
|
||||||
|
if ($LASTEXITCODE -eq 0) {
|
||||||
|
Write-Host "Upload successful: ${filename}"
|
||||||
|
} else {
|
||||||
|
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── macOS sidecar (CPU only — no CUDA on macOS) ──
|
||||||
|
|
||||||
|
build-sidecar-macos:
|
||||||
|
name: Build Sidecar (macOS)
|
||||||
|
needs: bump-sidecar-version
|
||||||
|
if: needs.bump-sidecar-version.outputs.has_changes == 'true'
|
||||||
|
runs-on: macos-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-sidecar-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: brew install portaudio
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
# Install CPU-only PyTorch for macOS (MPS support included in default torch)
|
||||||
|
uv sync
|
||||||
|
uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-macos-aarch64-cpu.zip .
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
which jq || brew install jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ needs.bump-sidecar-version.outputs.tag }}"
|
||||||
|
|
||||||
|
echo "Waiting for sidecar release ${TAG} to be available..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for file in sidecar-*.zip; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
300
.gitea/workflows/release.yml
Normal file
300
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bump-version:
|
||||||
|
name: Bump version and tag
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
new_version: ${{ steps.bump.outputs.new_version }}
|
||||||
|
tag: ${{ steps.bump.outputs.tag }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Configure git
|
||||||
|
run: |
|
||||||
|
git config user.name "Gitea Actions"
|
||||||
|
git config user.email "actions@gitea.local"
|
||||||
|
|
||||||
|
- name: Bump patch version
|
||||||
|
id: bump
|
||||||
|
run: |
|
||||||
|
# Read current version from package.json
|
||||||
|
CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
|
||||||
|
echo "Current version: ${CURRENT}"
|
||||||
|
|
||||||
|
# Increment patch number
|
||||||
|
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
echo "New version: ${NEW_VERSION}"
|
||||||
|
|
||||||
|
# Update package.json
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
|
||||||
|
|
||||||
|
# Update src-tauri/tauri.conf.json
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
|
||||||
|
|
||||||
|
# Update src-tauri/Cargo.toml
|
||||||
|
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
|
||||||
|
|
||||||
|
# Update version.py
|
||||||
|
sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
|
||||||
|
sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
|
||||||
|
|
||||||
|
echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and tag
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
NEW_VERSION="${{ steps.bump.outputs.new_version }}"
|
||||||
|
git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml version.py
|
||||||
|
git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
|
||||||
|
git tag "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
|
||||||
|
git pull --rebase "${REMOTE_URL}" main || true
|
||||||
|
git push "${REMOTE_URL}" HEAD:main
|
||||||
|
git push "${REMOTE_URL}" "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
- name: Create Gitea release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
RELEASE_NAME="Local Transcription ${TAG}"
|
||||||
|
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
|
||||||
|
"${REPO_API}/releases"
|
||||||
|
echo "Created release: ${RELEASE_NAME}"
|
||||||
|
|
||||||
|
# ── Platform builds (run after version bump) ──
|
||||||
|
|
||||||
|
build-linux:
|
||||||
|
name: Build App (Linux)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ needs.bump-version.outputs.tag }}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG}."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" -o -name "*.AppImage" \) | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
|
|
||||||
|
build-windows:
|
||||||
|
name: Build App (Windows)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: windows-latest
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command rustup -ErrorAction SilentlyContinue) {
|
||||||
|
rustup default stable
|
||||||
|
} else {
|
||||||
|
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||||
|
.\rustup-init.exe -y --default-toolchain stable
|
||||||
|
echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
shell: powershell
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
shell: powershell
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
shell: powershell
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||||
|
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
|
||||||
|
$TAG = "${{ needs.bump-version.outputs.tag }}"
|
||||||
|
Write-Host "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
$release = Invoke-RestMethod -Uri "${REPO_API}/releases/tags/${TAG}" -Headers $Headers -ErrorAction Stop
|
||||||
|
$RELEASE_ID = $release.id
|
||||||
|
Write-Host "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
|
||||||
|
$filename = $_.Name
|
||||||
|
$encodedName = [System.Uri]::EscapeDataString($filename)
|
||||||
|
$size = [math]::Round($_.Length / 1MB, 1)
|
||||||
|
Write-Host "Uploading ${filename} (${size} MB)..."
|
||||||
|
|
||||||
|
try {
|
||||||
|
$assets = Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets" -Headers $Headers
|
||||||
|
$existing = $assets | Where-Object { $_.name -eq $filename }
|
||||||
|
if ($existing) {
|
||||||
|
Invoke-RestMethod -Uri "${REPO_API}/releases/${RELEASE_ID}/assets/$($existing.id)" -Method Delete -Headers $Headers
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
$uploadUrl = "${REPO_API}/releases/${RELEASE_ID}/assets?name=${encodedName}"
|
||||||
|
$result = curl.exe --fail --silent --show-error `
|
||||||
|
-X POST `
|
||||||
|
-H "Authorization: token $env:BUILD_TOKEN" `
|
||||||
|
-H "Content-Type: application/octet-stream" `
|
||||||
|
-T "$($_.FullName)" `
|
||||||
|
"$uploadUrl" 2>&1
|
||||||
|
if ($LASTEXITCODE -eq 0) {
|
||||||
|
Write-Host "Upload successful: ${filename}"
|
||||||
|
} else {
|
||||||
|
Write-Host "WARNING: Upload failed for ${filename}: ${result}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
build-macos:
|
||||||
|
name: Build App (macOS)
|
||||||
|
needs: bump-version
|
||||||
|
runs-on: macos-latest
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ needs.bump-version.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: brew install --quiet create-dmg || true
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
which jq || brew install jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ needs.bump-version.outputs.tag }}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
RELEASE_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG}."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Release ID: ${RELEASE_ID}"
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -10,8 +10,8 @@ dist/
|
|||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
lib/
|
/lib/
|
||||||
lib64/
|
/lib64/
|
||||||
parts/
|
parts/
|
||||||
sdist/
|
sdist/
|
||||||
var/
|
var/
|
||||||
@@ -54,3 +54,12 @@ models/
|
|||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
*.spec.lock
|
*.spec.lock
|
||||||
|
|
||||||
|
# Node.js
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Vite / Svelte build output
|
||||||
|
dist/
|
||||||
|
|
||||||
|
# Tauri
|
||||||
|
src-tauri/target/
|
||||||
|
|||||||
407
CLAUDE.md
407
CLAUDE.md
@@ -4,52 +4,108 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
Local Transcription is a desktop application for real-time speech-to-text transcription designed for streamers. It uses Whisper models (via faster-whisper) to transcribe audio locally with optional multi-user server synchronization.
|
Local Transcription is a cross-platform desktop application for real-time speech-to-text transcription designed for streamers. It supports local Whisper models and cloud-based Deepgram transcription, with OBS browser source integration and optional multi-user sync.
|
||||||
|
|
||||||
|
**Architecture:** Two-process model — a Tauri v2 shell (Svelte 5 frontend) communicates with a headless Python backend (sidecar) via REST API and WebSocket.
|
||||||
|
|
||||||
**Key Features:**
|
**Key Features:**
|
||||||
- Standalone desktop GUI (PySide6/Qt)
|
- Cross-platform desktop app (Windows, macOS, Linux) via Tauri v2 + Svelte 5
|
||||||
- Local transcription with CPU/GPU support
|
- Headless Python backend with FastAPI control API
|
||||||
- Built-in web server for OBS browser source integration
|
- Dual transcription modes: local Whisper or cloud Deepgram (managed/BYOK)
|
||||||
- Optional Node.js-based multi-user server for syncing transcriptions across users
|
- Built-in web server for OBS browser source at `http://localhost:8080`
|
||||||
- Noise suppression and Voice Activity Detection (VAD)
|
- Optional multi-user sync via Node.js server
|
||||||
- Cross-platform builds (Linux/Windows) with PyInstaller
|
- CUDA, MPS (Apple Silicon), and CPU support
|
||||||
|
- Auto-updates, custom fonts, configurable colors
|
||||||
|
|
||||||
|
> **Legacy GUI:** The original PySide6/Qt GUI (`main.py`, `gui/`) still works during the transition. New features should target the Tauri frontend and headless backend.
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
local-transcription/
|
local-transcription/
|
||||||
├── client/ # Core transcription logic
|
├── src/ # Svelte 5 frontend (Tauri UI)
|
||||||
│ ├── audio_capture.py # Audio input and buffering
|
│ ├── App.svelte # Main app shell
|
||||||
│ ├── transcription_engine.py # Whisper model integration
|
│ ├── app.css # Global dark theme styles
|
||||||
|
│ ├── main.ts # Svelte mount point
|
||||||
|
│ ├── lib/components/ # UI components
|
||||||
|
│ │ ├── Header.svelte # Title bar + settings button
|
||||||
|
│ │ ├── StatusBar.svelte # State indicator, device, user info
|
||||||
|
│ │ ├── Controls.svelte # Start/Stop, Clear, Save buttons
|
||||||
|
│ │ ├── TranscriptionDisplay.svelte # Scrolling transcript view
|
||||||
|
│ │ └── Settings.svelte # Full settings modal (all sections)
|
||||||
|
│ └── lib/stores/ # Svelte 5 reactive stores ($state/$derived)
|
||||||
|
│ ├── backend.ts # WebSocket + REST API client
|
||||||
|
│ ├── config.ts # App configuration fetch/update
|
||||||
|
│ └── transcriptions.ts # Transcript data management
|
||||||
|
├── src-tauri/ # Tauri v2 Rust shell
|
||||||
|
│ ├── src/lib.rs # Plugin registration (shell, dialog, process)
|
||||||
|
│ ├── src/main.rs # Entry point
|
||||||
|
│ ├── tauri.conf.json # Window, bundle, plugin config
|
||||||
|
│ └── Cargo.toml # Rust dependencies
|
||||||
|
├── backend/ # Headless Python backend (the sidecar)
|
||||||
|
│ ├── app_controller.py # Core orchestration (engine, sync, config)
|
||||||
|
│ ├── api_server.py # FastAPI REST endpoints + /ws/control
|
||||||
|
│ └── main_headless.py # Headless entry point (prints JSON to stdout)
|
||||||
|
├── client/ # Core transcription modules (used by backend)
|
||||||
|
│ ├── audio_capture.py # Audio input handling
|
||||||
|
│ ├── transcription_engine_realtime.py # RealtimeSTT / Whisper engine
|
||||||
|
│ ├── deepgram_transcription.py # Deepgram WebSocket cloud transcription
|
||||||
│ ├── noise_suppression.py # VAD and noise reduction
|
│ ├── noise_suppression.py # VAD and noise reduction
|
||||||
│ ├── device_utils.py # CPU/GPU device management
|
│ ├── device_utils.py # CPU/GPU/MPS detection
|
||||||
│ ├── config.py # Configuration management
|
│ ├── config.py # YAML config management (~/.local-transcription/)
|
||||||
│ └── server_sync.py # Multi-user server sync client
|
│ ├── server_sync.py # Multi-user server sync client
|
||||||
├── gui/ # Desktop application UI
|
│ ├── instance_lock.py # Single-instance PID lock
|
||||||
│ ├── main_window_qt.py # Main application window (PySide6)
|
│ └── update_checker.py # Gitea release update checker
|
||||||
│ ├── settings_dialog_qt.py # Settings dialog (PySide6)
|
├── gui/ # Legacy PySide6/Qt GUI (still functional)
|
||||||
|
│ ├── main_window_qt.py # Main window (orchestration lives here in legacy)
|
||||||
|
│ ├── settings_dialog_qt.py # Settings dialog
|
||||||
│ └── transcription_display_qt.py # Display widget
|
│ └── transcription_display_qt.py # Display widget
|
||||||
├── server/ # Web display servers
|
├── server/
|
||||||
│ ├── web_display.py # FastAPI server for OBS browser source (local)
|
│ ├── web_display.py # FastAPI OBS display server (WebSocket + HTML)
|
||||||
│ └── nodejs/ # Optional multi-user Node.js server
|
│ └── nodejs/ # Optional multi-user sync server
|
||||||
│ ├── server.js # Multi-user sync server with WebSocket
|
├── .gitea/workflows/ # CI/CD
|
||||||
│ ├── package.json # Node.js dependencies
|
│ ├── release.yml # Tauri app builds (Linux/Windows/macOS)
|
||||||
│ └── README.md # Server deployment documentation
|
│ └── build-sidecar.yml # Python sidecar builds (CUDA + CPU)
|
||||||
├── config/ # Example configuration files
|
├── config/default_config.yaml # Default settings template
|
||||||
│ └── default_config.yaml # Default settings template
|
├── main.py # Legacy PySide6 GUI entry point
|
||||||
├── main.py # GUI application entry point
|
|
||||||
├── main_cli.py # CLI version for testing
|
├── main_cli.py # CLI version for testing
|
||||||
└── pyproject.toml # Dependencies and build config
|
├── version.py # Version string (__version__)
|
||||||
|
├── local-transcription.spec # PyInstaller config (legacy, includes PySide6)
|
||||||
|
├── local-transcription-headless.spec # PyInstaller config (headless sidecar, no Qt)
|
||||||
|
├── pyproject.toml # Python deps (uv, CUDA PyTorch index)
|
||||||
|
├── package.json # Node/Tauri deps
|
||||||
|
└── vite.config.ts # Vite build config ($lib alias)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Development Commands
|
## Development Commands
|
||||||
|
|
||||||
### Installation and Setup
|
### Frontend (Tauri + Svelte)
|
||||||
```bash
|
```bash
|
||||||
# Install dependencies (creates .venv automatically)
|
# Install npm dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Run Tauri in development mode (hot-reload)
|
||||||
|
npm run tauri dev
|
||||||
|
|
||||||
|
# Build frontend only (for testing)
|
||||||
|
npx vite build
|
||||||
|
|
||||||
|
# Type-check Svelte
|
||||||
|
npx svelte-check
|
||||||
|
|
||||||
|
# Check Rust compiles
|
||||||
|
cd src-tauri && cargo check
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backend (Python)
|
||||||
|
```bash
|
||||||
|
# Install Python dependencies
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
# Run the GUI application
|
# Run the headless backend standalone (for development)
|
||||||
|
uv run python -m backend.main_headless --port 8080
|
||||||
|
|
||||||
|
# Run the legacy PySide6 GUI
|
||||||
uv run python main.py
|
uv run python main.py
|
||||||
|
|
||||||
# Run CLI version (headless, for testing)
|
# Run CLI version (headless, for testing)
|
||||||
@@ -57,257 +113,154 @@ uv run python main_cli.py
|
|||||||
|
|
||||||
# List available audio devices
|
# List available audio devices
|
||||||
uv run python main_cli.py --list-devices
|
uv run python main_cli.py --list-devices
|
||||||
|
|
||||||
# Install with CUDA support (if needed)
|
|
||||||
uv pip install torch --index-url https://download.pytorch.org/whl/cu121
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building Executables
|
### Building
|
||||||
```bash
|
```bash
|
||||||
# Linux (includes CUDA support - works on both GPU and CPU systems)
|
# Build Tauri app (produces platform installer)
|
||||||
./build.sh
|
npm run tauri build
|
||||||
|
|
||||||
# Windows (includes CUDA support - works on both GPU and CPU systems)
|
# Build headless Python sidecar (no PySide6)
|
||||||
build.bat
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
# Output: dist/local-transcription-backend/
|
||||||
|
|
||||||
# Manual build with PyInstaller
|
# Build legacy PySide6 app
|
||||||
uv sync # Install dependencies (includes CUDA PyTorch)
|
|
||||||
uv pip uninstall -q enum34 # Remove incompatible enum34 package
|
|
||||||
uv run pyinstaller local-transcription.spec
|
uv run pyinstaller local-transcription.spec
|
||||||
|
# Or use: ./build.sh (Linux) / build.bat (Windows)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Important:** All builds include CUDA support via `pyproject.toml` configuration. CUDA builds can be created on systems without NVIDIA GPUs. The PyTorch CUDA runtime is bundled, and the app automatically falls back to CPU if no GPU is available.
|
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
```bash
|
```bash
|
||||||
# Run component tests
|
|
||||||
uv run python test_components.py
|
uv run python test_components.py
|
||||||
|
|
||||||
# Check CUDA availability
|
|
||||||
uv run python check_cuda.py
|
uv run python check_cuda.py
|
||||||
|
|
||||||
# Test web server manually
|
|
||||||
uv run python -m uvicorn server.web_display:app --reload
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Architecture
|
## Architecture Details
|
||||||
|
|
||||||
### Audio Processing Pipeline
|
### Communication: Tauri <-> Python Backend
|
||||||
|
|
||||||
1. **Audio Capture** ([client/audio_capture.py](client/audio_capture.py))
|
The Svelte frontend connects to the Python backend via two channels:
|
||||||
- Captures audio from microphone/system using sounddevice
|
|
||||||
- Handles automatic sample rate detection and resampling
|
|
||||||
- Uses chunking with overlap for better transcription quality
|
|
||||||
- Default: 3-second chunks with 0.5s overlap
|
|
||||||
|
|
||||||
2. **Noise Suppression** ([client/noise_suppression.py](client/noise_suppression.py))
|
**REST API** (on port 8081 by default):
|
||||||
- Applies noisereduce for background noise reduction
|
- `GET /api/status` — app state, device info, version
|
||||||
- Voice Activity Detection (VAD) using webrtcvad
|
- `POST /api/start` / `POST /api/stop` — transcription control
|
||||||
- Skips silent segments to improve performance
|
- `GET /api/config` / `PUT /api/config` — read/write settings (dot-notation keys)
|
||||||
|
- `GET /api/audio-devices` / `GET /api/compute-devices` — device enumeration
|
||||||
|
- `POST /api/reload-engine` — reload with new model/device
|
||||||
|
- `GET /api/transcriptions` / `POST /api/clear` — transcript management
|
||||||
|
- `POST /api/save-file` — write text to a file path
|
||||||
|
- `GET /api/check-update` / `POST /api/skip-version` — update management
|
||||||
|
- `POST /api/login` / `POST /api/register` / `GET /api/balance` — managed mode proxy
|
||||||
|
|
||||||
3. **Transcription** ([client/transcription_engine.py](client/transcription_engine.py))
|
**WebSocket** `/ws/control`:
|
||||||
- Uses faster-whisper for efficient inference
|
- Pushes real-time events: `state_changed`, `transcription`, `preview`, `error`, `credits_low`
|
||||||
- Supports CPU, CUDA, and Apple MPS (Mac)
|
- Client sends keepalive pings
|
||||||
- Models: tiny, base, small, medium, large
|
|
||||||
- Thread-safe model loading with locks
|
|
||||||
|
|
||||||
4. **Display** ([gui/main_window_qt.py](gui/main_window_qt.py))
|
The OBS display server runs separately on port 8080 (`GET /` for HTML, `WebSocket /ws` for transcriptions).
|
||||||
- PySide6/Qt-based desktop GUI
|
|
||||||
- Real-time transcription display with scrolling
|
|
||||||
- Settings panel with live updates (no restart needed)
|
|
||||||
|
|
||||||
### Web Server Architecture
|
### Backend Process Lifecycle
|
||||||
|
|
||||||
**Local Web Server** ([server/web_display.py](server/web_display.py))
|
1. `main_headless.py` starts, acquires instance lock, creates `AppController`
|
||||||
- Always runs when GUI starts (port 8080 by default)
|
2. `AppController.initialize()` starts the OBS web server (port 8080) and engine init thread
|
||||||
- FastAPI with WebSocket for real-time updates
|
3. `APIServer` wraps the controller with FastAPI routes, runs on port 8081
|
||||||
- Used for OBS browser source integration
|
4. Backend prints `{"event": "ready", "port": 8080}` to stdout for Tauri to discover
|
||||||
- Single-user (displays only local transcriptions)
|
5. On shutdown: engine stopped, web server stopped, lock released
|
||||||
|
|
||||||
**Multi-User Server** (Optional - for syncing across multiple users)
|
### Headless Backend vs Legacy GUI
|
||||||
|
|
||||||
**Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **RECOMMENDED**
|
The `AppController` class (`backend/app_controller.py`) extracts all orchestration logic from `gui/main_window_qt.py` into a Qt-free class. The mapping:
|
||||||
- Real-time WebSocket support (< 100ms latency)
|
|
||||||
- Handles 100+ concurrent users
|
|
||||||
- Easy deployment to VPS/cloud hosting (Railway, Heroku, DigitalOcean, or any VPS)
|
|
||||||
- Configurable display options via URL parameters:
|
|
||||||
- `timestamps=true/false` - Show/hide timestamps
|
|
||||||
- `maxlines=50` - Maximum visible lines (prevents scroll bars in OBS)
|
|
||||||
- `fontsize=16` - Font size in pixels
|
|
||||||
- `fontfamily=Arial` - Font family
|
|
||||||
- `fade=10` - Seconds before text fades (0 = never)
|
|
||||||
|
|
||||||
See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructions
|
| Legacy (MainWindow) | Headless (AppController) |
|
||||||
|
|---------------------|--------------------------|
|
||||||
|
| `_initialize_components()` | `_initialize_engine()` |
|
||||||
|
| `_start_transcription()` | `start_transcription()` |
|
||||||
|
| `_stop_transcription()` | `stop_transcription()` |
|
||||||
|
| `_on_settings_saved()` | `apply_settings()` |
|
||||||
|
| `_reload_engine()` | `reload_engine()` |
|
||||||
|
| `_start_web_server_if_enabled()` | `_start_web_server()` |
|
||||||
|
| `_start_server_sync()` | `_start_server_sync()` |
|
||||||
|
| Qt signals | Callbacks (`on_state_changed`, `on_transcription`, etc.) |
|
||||||
|
|
||||||
### Configuration System
|
### Threading Model (Headless)
|
||||||
|
|
||||||
- Config stored at `~/.local-transcription/config.yaml`
|
- Main thread: Uvicorn (FastAPI) event loop
|
||||||
- Managed by [client/config.py](client/config.py)
|
- Engine init thread: Downloads models, initializes VAD
|
||||||
- Settings apply immediately without restart (except model changes)
|
- Web server thread: Separate asyncio loop for OBS display
|
||||||
- YAML format with nested keys (e.g., `transcription.model`)
|
- Audio capture: Runs in engine callback threads
|
||||||
|
- All results flow through `AppController` callbacks -> `APIServer` WebSocket broadcast
|
||||||
|
|
||||||
### Device Management
|
### Svelte Frontend
|
||||||
|
|
||||||
- [client/device_utils.py](client/device_utils.py) handles CPU/GPU detection
|
Uses Svelte 5 runes throughout (`$state`, `$derived`, `$effect`, `$props`). No Svelte 4 patterns.
|
||||||
- Auto-detects CUDA, MPS (Mac), or falls back to CPU
|
|
||||||
- Compute types: float32 (best quality), float16 (GPU), int8 (fastest)
|
|
||||||
- Thread-safe device selection
|
|
||||||
|
|
||||||
## Key Implementation Details
|
**Stores** (`src/lib/stores/`):
|
||||||
|
- `backend.ts` — WebSocket connection + REST helpers (`apiGet`, `apiPost`, `apiPut`), auto-reconnect
|
||||||
|
- `config.ts` — fetches/updates config from backend API
|
||||||
|
- `transcriptions.ts` — manages transcript list, listens for `CustomEvent`s from backend store
|
||||||
|
|
||||||
### PyInstaller Build Configuration
|
**Key patterns:**
|
||||||
|
- Backend store dispatches `CustomEvent`s on `window` for cross-store communication
|
||||||
|
- Settings component collects all changed values into a `Record<string, any>` with dot-notation keys, sends via `PUT /api/config`
|
||||||
|
- Controls use Tauri dialog plugin for native file save, falls back to blob download
|
||||||
|
|
||||||
- [local-transcription.spec](local-transcription.spec) controls build
|
## CI/CD
|
||||||
- UPX compression enabled for smaller executables
|
|
||||||
- Hidden imports required for PySide6, faster-whisper, torch
|
|
||||||
- Console mode enabled by default (set `console=False` to hide)
|
|
||||||
|
|
||||||
### Threading Model
|
Two Gitea Actions workflows in `.gitea/workflows/`:
|
||||||
|
|
||||||
- Main thread: Qt GUI event loop
|
- **`release.yml`**: Triggers on push to `main`. Auto-bumps version, builds Tauri app on Linux/Windows/macOS, uploads `.deb`, `.rpm`, `.msi`, `.dmg` to Gitea release.
|
||||||
- Audio thread: Captures and processes audio chunks
|
- **`build-sidecar.yml`**: Triggers on changes to `client/`, `server/`, `backend/`, `pyproject.toml`. Builds headless Python sidecar via PyInstaller. CUDA + CPU for Linux/Windows, CPU-only for macOS.
|
||||||
- Web server thread: Runs FastAPI server
|
|
||||||
- Transcription: Runs in callback thread from audio capture
|
|
||||||
- All transcription results communicated via Qt signals
|
|
||||||
|
|
||||||
### Server Sync (Optional Multi-User Feature)
|
Both require a `BUILD_TOKEN` secret (Gitea API token with release write access).
|
||||||
|
|
||||||
- [client/server_sync.py](client/server_sync.py) handles server communication
|
|
||||||
- Toggle in Settings: "Enable Server Sync"
|
|
||||||
- Sends transcriptions to Node.js server via HTTP POST
|
|
||||||
- Real-time updates via WebSocket to display page
|
|
||||||
- Per-speaker font support (Web-Safe, Google Fonts, Custom uploads)
|
|
||||||
- Falls back gracefully if server unavailable
|
|
||||||
|
|
||||||
## Common Patterns
|
## Common Patterns
|
||||||
|
|
||||||
### Adding a New Setting
|
### Adding a New Setting
|
||||||
|
|
||||||
1. Add to [config/default_config.yaml](config/default_config.yaml)
|
1. Add default to [config/default_config.yaml](config/default_config.yaml)
|
||||||
2. Update [client/config.py](client/config.py) if validation needed
|
2. Add UI control in [src/lib/components/Settings.svelte](src/lib/components/Settings.svelte)
|
||||||
3. Add UI control in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
3. Ensure the setting is included in the save handler's config update
|
||||||
4. Apply setting in relevant component (no restart if possible)
|
4. Apply in `AppController.apply_settings()` or the relevant component
|
||||||
5. Emit signal to update display if needed
|
5. For legacy GUI: also update [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
||||||
|
|
||||||
|
### Adding a New API Endpoint
|
||||||
|
|
||||||
|
1. Add route in [backend/api_server.py](backend/api_server.py) `_setup_routes()`
|
||||||
|
2. Add supporting logic in [backend/app_controller.py](backend/app_controller.py) if needed
|
||||||
|
3. Call from Svelte via `backendStore.apiGet/apiPost/apiPut`
|
||||||
|
|
||||||
### Modifying Transcription Display
|
### Modifying Transcription Display
|
||||||
|
|
||||||
- Local GUI: [gui/transcription_display_qt.py](gui/transcription_display_qt.py)
|
- Tauri UI: [src/lib/components/TranscriptionDisplay.svelte](src/lib/components/TranscriptionDisplay.svelte)
|
||||||
- Local web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
|
- OBS display: [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
|
||||||
- Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
|
- Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
|
||||||
|
|
||||||
### Adding a New Model Size
|
|
||||||
|
|
||||||
- Update [client/transcription_engine.py](client/transcription_engine.py)
|
|
||||||
- Add to model selector in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
|
||||||
- Update CLI argument choices in [main_cli.py](main_cli.py)
|
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
**Core:**
|
**Frontend:** Tauri v2, Svelte 5, Vite, TypeScript
|
||||||
- `faster-whisper`: Optimized Whisper inference
|
**Backend:** Python 3.9+, FastAPI, Uvicorn, RealtimeSTT, faster-whisper, PyTorch (CUDA), sounddevice
|
||||||
- `torch`: ML framework (CUDA-enabled via special index)
|
**Build:** PyInstaller (sidecar), Tauri CLI (app), uv (Python packages)
|
||||||
- `PySide6`: Qt6 bindings for GUI
|
**CI:** Gitea Actions with platform-specific runners
|
||||||
- `sounddevice`: Cross-platform audio I/O
|
|
||||||
- `noisereduce`, `webrtcvad`: Audio preprocessing
|
|
||||||
|
|
||||||
**Web Server:**
|
|
||||||
- `fastapi`, `uvicorn`: Web server and ASGI
|
|
||||||
- `websockets`: Real-time communication
|
|
||||||
|
|
||||||
**Build:**
|
|
||||||
- `pyinstaller`: Create standalone executables
|
|
||||||
- `uv`: Fast package manager
|
|
||||||
|
|
||||||
**PyTorch CUDA Index:**
|
|
||||||
- Configured in [pyproject.toml](pyproject.toml) under `[[tool.uv.index]]`
|
|
||||||
- Uses PyTorch's custom wheel repository for CUDA builds
|
|
||||||
- Automatically installed with `uv sync` when using CUDA build scripts
|
|
||||||
|
|
||||||
## Platform-Specific Notes
|
## Platform-Specific Notes
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
- Uses PulseAudio/ALSA for audio
|
- Tauri needs: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
|
||||||
- Build scripts use bash (`.sh` files)
|
- Audio: PulseAudio/ALSA via sounddevice
|
||||||
- Executable: `dist/LocalTranscription/LocalTranscription`
|
|
||||||
|
|
||||||
### Windows
|
### Windows
|
||||||
- Uses Windows Audio/WASAPI
|
- Tauri needs: WebView2 (usually pre-installed on Windows 10+)
|
||||||
- Build scripts use batch (`.bat` files)
|
- Audio: WASAPI via sounddevice
|
||||||
- Executable: `dist\LocalTranscription\LocalTranscription.exe`
|
|
||||||
- Requires Visual C++ Redistributable on target systems
|
|
||||||
|
|
||||||
### Cross-Building
|
### macOS
|
||||||
- **Cannot cross-compile** - must build on target platform
|
- Tauri needs: Xcode Command Line Tools
|
||||||
- CI/CD should use platform-specific runners
|
- Audio: CoreAudio via sounddevice
|
||||||
|
- GPU: MPS (Apple Silicon) detected by `device_utils.py`
|
||||||
## Troubleshooting
|
- `Info.plist` must include `NSMicrophoneUsageDescription` for mic access
|
||||||
|
- No CUDA builds — CPU/MPS only
|
||||||
### Model Loading Issues
|
|
||||||
- Models download to `~/.cache/huggingface/`
|
|
||||||
- First run requires internet connection
|
|
||||||
- Check disk space (models: 75MB-3GB depending on size)
|
|
||||||
|
|
||||||
### Audio Device Issues
|
|
||||||
- Run `uv run python main_cli.py --list-devices`
|
|
||||||
- Check permissions (microphone access)
|
|
||||||
- Try different device indices in settings
|
|
||||||
|
|
||||||
### GPU Not Detected
|
|
||||||
- Run `uv run python check_cuda.py`
|
|
||||||
- Install CUDA drivers (not CUDA toolkit - bundled in build)
|
|
||||||
- Verify PyTorch sees GPU: `python -c "import torch; print(torch.cuda.is_available())"`
|
|
||||||
|
|
||||||
### Web Server Port Conflicts
|
|
||||||
- Default port: 8080
|
|
||||||
- Change in [gui/main_window_qt.py](gui/main_window_qt.py) or config
|
|
||||||
- Use `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
|
|
||||||
|
|
||||||
## OBS Integration
|
|
||||||
|
|
||||||
### Local Display (Single User)
|
|
||||||
1. Start Local Transcription app
|
|
||||||
2. In OBS: Add "Browser" source
|
|
||||||
3. URL: `http://localhost:8080`
|
|
||||||
4. Set dimensions (e.g., 1920x300)
|
|
||||||
|
|
||||||
### Multi-User Display (Node.js Server)
|
|
||||||
1. Deploy Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
|
|
||||||
2. Each user configures Server URL: `http://your-server:3000/api/send`
|
|
||||||
3. Enter same room name and passphrase
|
|
||||||
4. In OBS: Add "Browser" source
|
|
||||||
5. URL: `http://your-server:3000/display?room=ROOM&fade=10×tamps=true&maxlines=50&fontsize=16`
|
|
||||||
6. Customize URL parameters as needed:
|
|
||||||
- `timestamps=false` - Hide timestamps
|
|
||||||
- `maxlines=30` - Show max 30 lines (prevents scroll bars)
|
|
||||||
- `fontsize=18` - Larger font
|
|
||||||
- `fontfamily=Courier` - Different font
|
|
||||||
|
|
||||||
## Performance Optimization
|
|
||||||
|
|
||||||
**For Real-Time Transcription:**
|
|
||||||
- Use `tiny` or `base` model (faster)
|
|
||||||
- Enable GPU if available (5-10x faster)
|
|
||||||
- Increase chunk_duration for better accuracy (higher latency)
|
|
||||||
- Decrease chunk_duration for lower latency (less context)
|
|
||||||
- Enable VAD to skip silent audio
|
|
||||||
|
|
||||||
**For Build Size Reduction:**
|
|
||||||
- Don't bundle models (download on demand)
|
|
||||||
- Use CPU-only build if no GPU users
|
|
||||||
- Enable UPX compression (already in spec)
|
|
||||||
|
|
||||||
## Phase Status
|
|
||||||
|
|
||||||
- ✅ **Phase 1**: Standalone desktop application (complete)
|
|
||||||
- ✅ **Web Server**: Local OBS integration (complete)
|
|
||||||
- ✅ **Builds**: PyInstaller executables (complete)
|
|
||||||
- ✅ **Phase 2**: Multi-user Node.js server (complete, optional)
|
|
||||||
- ⏸️ **Phase 3+**: Advanced features (see [NEXT_STEPS.md](NEXT_STEPS.md))
|
|
||||||
|
|
||||||
## Related Documentation
|
## Related Documentation
|
||||||
|
|
||||||
- [README.md](README.md) - User-facing documentation
|
- [README.md](README.md) — User-facing documentation
|
||||||
- [BUILD.md](BUILD.md) - Detailed build instructions
|
- [BUILD.md](BUILD.md) — Detailed build instructions
|
||||||
- [INSTALL.md](INSTALL.md) - Installation guide
|
- [INSTALL.md](INSTALL.md) — Installation guide
|
||||||
- [NEXT_STEPS.md](NEXT_STEPS.md) - Future enhancements
|
- [server/nodejs/README.md](server/nodejs/README.md) — Node.js server setup
|
||||||
- [server/nodejs/README.md](server/nodejs/README.md) - Node.js server setup and deployment
|
|
||||||
|
|||||||
574
DEEPGRAM_PROXY_PLAN.md
Normal file
574
DEEPGRAM_PROXY_PLAN.md
Normal file
@@ -0,0 +1,574 @@
|
|||||||
|
# Deepgram Proxy Service — Build Plan
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
Build a standalone hosted service that acts as a Deepgram proxy for the Local Transcription
|
||||||
|
desktop app. Users can either provide their own Deepgram API key (BYOK) or use the managed
|
||||||
|
service with prepaid credits purchased via Stripe.
|
||||||
|
|
||||||
|
This is a **separate repository** from `local-transcription`. The desktop app will be updated
|
||||||
|
in a second phase to support both modes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Repository Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
transcription-proxy/
|
||||||
|
├── src/
|
||||||
|
│ ├── server.js # Express app entry point
|
||||||
|
│ ├── config.js # Environment config loader
|
||||||
|
│ ├── db/
|
||||||
|
│ │ ├── index.js # node-postgres pool setup
|
||||||
|
│ │ └── migrations/ # SQL migration files (numbered)
|
||||||
|
│ │ ├── 001_users.sql
|
||||||
|
│ │ ├── 002_credits.sql
|
||||||
|
│ │ ├── 003_sessions.sql
|
||||||
|
│ │ └── 004_usage_ledger.sql
|
||||||
|
│ ├── middleware/
|
||||||
|
│ │ ├── auth.js # JWT verification middleware
|
||||||
|
│ │ └── rateLimit.js # Per-user rate limiting
|
||||||
|
│ ├── routes/
|
||||||
|
│ │ ├── auth.js # POST /auth/register, /auth/login, /auth/refresh
|
||||||
|
│ │ ├── billing.js # POST /billing/checkout, GET /billing/balance
|
||||||
|
│ │ └── account.js # GET /account/me, GET /account/usage
|
||||||
|
│ ├── websocket/
|
||||||
|
│ │ └── proxy.js # WebSocket proxy handler (core feature)
|
||||||
|
│ └── webhooks/
|
||||||
|
│ └── stripe.js # POST /webhooks/stripe
|
||||||
|
├── web/ # Simple frontend dashboard
|
||||||
|
│ ├── index.html # Landing / login page
|
||||||
|
│ ├── dashboard.html # Balance, usage history, buy credits
|
||||||
|
│ └── assets/
|
||||||
|
│ ├── app.js
|
||||||
|
│ └── style.css
|
||||||
|
├── .env.example
|
||||||
|
├── package.json
|
||||||
|
├── docker-compose.yml # Postgres + app for local dev
|
||||||
|
└── CLAUDE.md # This file (after renaming)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Technology Stack
|
||||||
|
|
||||||
|
- **Runtime**: Node.js 20+
|
||||||
|
- **Framework**: Express 4
|
||||||
|
- **WebSocket**: `ws` library (not socket.io — keep it lean)
|
||||||
|
- **Database**: PostgreSQL 15+ via `pg` (node-postgres)
|
||||||
|
- **Auth**: JWT via `jsonwebtoken`, passwords hashed with `bcrypt`
|
||||||
|
- **Payments**: Stripe Node SDK (`stripe`)
|
||||||
|
- **Environment**: `dotenv`
|
||||||
|
- **Dev tooling**: `nodemon` for dev, no TypeScript (keep it simple)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
Run migrations in order. Use a simple `schema_migrations` table to track applied migrations.
|
||||||
|
|
||||||
|
### 001_users.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE schema_migrations (
|
||||||
|
version INTEGER PRIMARY KEY,
|
||||||
|
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE users (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
email TEXT UNIQUE NOT NULL,
|
||||||
|
password_hash TEXT NOT NULL,
|
||||||
|
stripe_customer_id TEXT UNIQUE,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 002_credits.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE credit_balance (
|
||||||
|
user_id UUID PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
seconds_remaining INTEGER NOT NULL DEFAULT 0,
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 003_sessions.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE transcription_sessions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id),
|
||||||
|
mode TEXT NOT NULL CHECK (mode IN ('managed', 'byok')),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
ended_at TIMESTAMPTZ,
|
||||||
|
seconds_used INTEGER NOT NULL DEFAULT 0,
|
||||||
|
deepgram_model TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'completed', 'terminated'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_sessions_user_id ON transcription_sessions(user_id);
|
||||||
|
CREATE INDEX idx_sessions_started_at ON transcription_sessions(started_at);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 004_usage_ledger.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE usage_ledger (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id),
|
||||||
|
session_id UUID REFERENCES transcription_sessions(id),
|
||||||
|
recorded_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
seconds INTEGER NOT NULL,
|
||||||
|
description TEXT -- e.g. 'session_usage', 'credit_purchase', 'manual_adjustment'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_ledger_user_id ON usage_ledger(user_id);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables (.env.example)
|
||||||
|
|
||||||
|
```env
|
||||||
|
# Server
|
||||||
|
PORT=3000
|
||||||
|
NODE_ENV=development
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:5432/transcription_proxy
|
||||||
|
|
||||||
|
# Auth
|
||||||
|
JWT_SECRET=changeme_use_long_random_string
|
||||||
|
JWT_EXPIRY=7d
|
||||||
|
|
||||||
|
# Stripe
|
||||||
|
STRIPE_SECRET_KEY=sk_test_...
|
||||||
|
STRIPE_WEBHOOK_SECRET=whsec_...
|
||||||
|
|
||||||
|
# Deepgram
|
||||||
|
DEEPGRAM_API_KEY=your_deepgram_key_here
|
||||||
|
|
||||||
|
# Pricing (seconds per dollar — adjust for your margin)
|
||||||
|
# Default: 1000 seconds per $1 = $0.006/min managed cost covered + margin
|
||||||
|
CREDITS_PER_DOLLAR=1000
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1 — Core Server & Auth
|
||||||
|
|
||||||
|
### Goals
|
||||||
|
- Working Express app with Postgres connection
|
||||||
|
- Migration runner
|
||||||
|
- User registration and login
|
||||||
|
- JWT middleware
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Scaffold project**
|
||||||
|
- `npm init`, install dependencies: `express ws pg jsonwebtoken bcrypt stripe dotenv`
|
||||||
|
- Dev dependencies: `nodemon`
|
||||||
|
- Add `start` and `dev` scripts to package.json
|
||||||
|
|
||||||
|
2. **Database connection** (`src/db/index.js`)
|
||||||
|
- Export a `pg.Pool` instance using `DATABASE_URL`
|
||||||
|
- Export a `migrate()` function that reads `src/db/migrations/*.sql` in order,
|
||||||
|
checks `schema_migrations` table, and applies unapplied ones
|
||||||
|
- Call `migrate()` on server startup before listening
|
||||||
|
|
||||||
|
3. **Auth routes** (`src/routes/auth.js`)
|
||||||
|
- `POST /auth/register` — validate email/password, hash password with bcrypt (cost 12),
|
||||||
|
insert user, insert empty credit_balance row, return JWT
|
||||||
|
- `POST /auth/login` — verify credentials, return JWT + refresh token
|
||||||
|
- `POST /auth/refresh` — validate refresh token, return new JWT
|
||||||
|
- Passwords: minimum 8 characters, validate email format
|
||||||
|
|
||||||
|
4. **JWT middleware** (`src/middleware/auth.js`)
|
||||||
|
- Verify `Authorization: Bearer <token>` header
|
||||||
|
- Attach `req.user = { id, email }` on success
|
||||||
|
- Return 401 on failure
|
||||||
|
- Export as `requireAuth` middleware
|
||||||
|
|
||||||
|
5. **Basic health check**
|
||||||
|
- `GET /health` returns `{ status: 'ok', db: 'connected' }`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2 — Billing & Credits
|
||||||
|
|
||||||
|
### Goals
|
||||||
|
- Stripe Checkout session creation for credit purchases
|
||||||
|
- Webhook handler to fulfill purchases
|
||||||
|
- Balance endpoint
|
||||||
|
|
||||||
|
### Payment Methods
|
||||||
|
|
||||||
|
Use **Stripe Dynamic Payment Methods** — do NOT hardcode `payment_method_types` in the
|
||||||
|
Checkout Session. Instead, leave it unset and manage everything from the Stripe Dashboard.
|
||||||
|
|
||||||
|
Enable the following in the Stripe Dashboard under Settings → Payment Methods:
|
||||||
|
- **Cards** (Visa, Mastercard, Amex, Discover) — on by default
|
||||||
|
- **PayPal** — enable manually
|
||||||
|
- **Apple Pay** — on by default, shows automatically on Safari/iOS
|
||||||
|
- **Google Pay** — enable manually (one toggle)
|
||||||
|
- **Cash App Pay** — enable manually (popular with streaming audiences)
|
||||||
|
- **Link** — Stripe's saved payment network, on by default
|
||||||
|
|
||||||
|
Stripe will automatically show the most relevant methods to each user based on their
|
||||||
|
location and device. No code changes are needed to add or remove methods in future —
|
||||||
|
it's all dashboard config.
|
||||||
|
|
||||||
|
### Credit Packages
|
||||||
|
|
||||||
|
Define these as constants in `src/config.js`:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
CREDIT_PACKAGES: [
|
||||||
|
{ id: 'pack_500', label: '500 minutes', seconds: 30000, price_cents: 300 },
|
||||||
|
{ id: 'pack_1200', label: '1200 minutes', seconds: 72000, price_cents: 600 },
|
||||||
|
{ id: 'pack_3000', label: '3000 minutes', seconds: 180000, price_cents: 1200 },
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Adjust pricing to cover Deepgram costs ($0.006/min = $0.0001/sec) plus margin and
|
||||||
|
Stripe fees (~2.9% + $0.30).
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Stripe customer creation**
|
||||||
|
- On user registration, create a Stripe customer and store `stripe_customer_id`
|
||||||
|
- Do this asynchronously (don't block registration response)
|
||||||
|
|
||||||
|
2. **Billing routes** (`src/routes/billing.js`)
|
||||||
|
- `GET /billing/packages` — return credit package list (no auth required)
|
||||||
|
- `POST /billing/checkout` — requires auth, accepts `{ package_id }`,
|
||||||
|
creates Stripe Checkout Session using dynamic payment methods (do NOT pass
|
||||||
|
`payment_method_types` — omitting it enables dynamic methods automatically),
|
||||||
|
include `payment_intent_data.metadata` containing `user_id` and `package_id`,
|
||||||
|
returns `{ checkout_url }`
|
||||||
|
- `GET /billing/balance` — requires auth, returns `{ seconds_remaining, minutes_remaining }`
|
||||||
|
|
||||||
|
3. **Stripe webhook** (`src/webhooks/stripe.js`)
|
||||||
|
- Mount at `POST /webhooks/stripe` with raw body (use `express.raw()` for this route only)
|
||||||
|
- Verify signature with `stripe.webhooks.constructEvent()`
|
||||||
|
- Handle `checkout.session.completed`:
|
||||||
|
- Extract `user_id` and `package_id` from metadata
|
||||||
|
- Add seconds to `credit_balance`
|
||||||
|
- Insert row into `usage_ledger` with description `'credit_purchase'`
|
||||||
|
- Handle `payment_intent.payment_failed`: log it (no action needed for prepaid)
|
||||||
|
|
||||||
|
4. **Success/cancel pages**
|
||||||
|
- Stripe Checkout redirects to `GET /billing/success?session_id=...` and `/billing/cancel`
|
||||||
|
- These can be simple HTML responses or redirects to the web dashboard
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3 — WebSocket Proxy (Core Feature)
|
||||||
|
|
||||||
|
This is the most critical component. The proxy sits between the desktop client and Deepgram,
|
||||||
|
forwarding audio while tracking usage in real time.
|
||||||
|
|
||||||
|
### Connection Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Client connects → validate JWT → check credit balance → open Deepgram upstream
|
||||||
|
↓
|
||||||
|
Audio chunks arrive → forward to Deepgram → record usage every 5 seconds
|
||||||
|
↓
|
||||||
|
Transcription arrives from Deepgram → forward to client
|
||||||
|
↓
|
||||||
|
Client disconnects (or credits exhausted) → close upstream → finalize session
|
||||||
|
```
|
||||||
|
|
||||||
|
### WebSocket Protocol
|
||||||
|
|
||||||
|
**Client connects to**: `wss://your-domain/ws/transcribe`
|
||||||
|
|
||||||
|
**Client sends as first message** (JSON):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "auth",
|
||||||
|
"token": "<JWT>",
|
||||||
|
"config": {
|
||||||
|
"model": "nova-2",
|
||||||
|
"language": "en-US",
|
||||||
|
"interim_results": true,
|
||||||
|
"endpointing": 300
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**After auth success, client sends**: raw audio binary frames (PCM 16kHz mono)
|
||||||
|
|
||||||
|
**Server sends to client**:
|
||||||
|
```json
|
||||||
|
{ "type": "ready" }
|
||||||
|
{ "type": "transcript", "text": "...", "is_final": true, "confidence": 0.98 }
|
||||||
|
{ "type": "error", "code": "insufficient_credits", "message": "..." }
|
||||||
|
{ "type": "credits_low", "seconds_remaining": 300 }
|
||||||
|
{ "type": "session_end", "seconds_used": 120 }
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tasks (`src/websocket/proxy.js`)
|
||||||
|
|
||||||
|
1. **Upgrade handler**
|
||||||
|
- Attach to the HTTP server using `ws.Server({ noServer: true })`
|
||||||
|
- In `server.on('upgrade', ...)`, route `/ws/transcribe` to this handler
|
||||||
|
|
||||||
|
2. **Auth handshake**
|
||||||
|
- First message must be `{ type: 'auth', token: '...' }` — received within 5 seconds
|
||||||
|
or connection is terminated
|
||||||
|
- Verify JWT, load user's credit balance from DB
|
||||||
|
- If balance is 0 or negative, send `insufficient_credits` error and close
|
||||||
|
|
||||||
|
3. **Deepgram upstream connection**
|
||||||
|
- Open a WebSocket to Deepgram's streaming API:
|
||||||
|
`wss://api.deepgram.com/v1/listen?model=nova-2&language=en-US&interim_results=true`
|
||||||
|
- Auth header: `Authorization: Token <DEEPGRAM_API_KEY>`
|
||||||
|
- Use query params from client's `config` object (whitelist allowed params)
|
||||||
|
|
||||||
|
4. **Audio forwarding**
|
||||||
|
- All binary messages from client → forward directly to Deepgram upstream
|
||||||
|
- All messages from Deepgram → parse JSON, reformat, forward to client
|
||||||
|
|
||||||
|
5. **Usage tracking**
|
||||||
|
- Create a `transcription_sessions` row on connection
|
||||||
|
- Maintain an in-memory `secondsUsed` counter per connection
|
||||||
|
- Deepgram sends `{ type: 'Results', duration: X }` in responses — use this for
|
||||||
|
accurate second counting
|
||||||
|
- Every 10 seconds (or on disconnect), write current `secondsUsed` to DB:
|
||||||
|
- Update `transcription_sessions.seconds_used`
|
||||||
|
- Decrement `credit_balance.seconds_remaining`
|
||||||
|
- Insert into `usage_ledger`
|
||||||
|
- If `seconds_remaining` hits 0: send `insufficient_credits`, close connection
|
||||||
|
|
||||||
|
6. **Cleanup on disconnect**
|
||||||
|
- Mark session as `completed`, set `ended_at`
|
||||||
|
- Do final usage flush to DB
|
||||||
|
- Close Deepgram upstream if still open
|
||||||
|
|
||||||
|
7. **Error handling**
|
||||||
|
- If Deepgram upstream closes unexpectedly, notify client and close
|
||||||
|
- If client sends malformed data, log and continue (don't crash)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4 — Account Routes & Rate Limiting
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Account routes** (`src/routes/account.js`)
|
||||||
|
- `GET /account/me` — returns `{ email, credits: { seconds_remaining, minutes_remaining }, created_at }`
|
||||||
|
- `GET /account/usage` — returns last 30 days of `usage_ledger` entries grouped by day,
|
||||||
|
plus list of last 10 sessions with duration
|
||||||
|
|
||||||
|
2. **Rate limiting** (`src/middleware/rateLimit.js`)
|
||||||
|
- Use in-memory rate limiting (no Redis needed at this scale)
|
||||||
|
- Auth endpoints: max 10 requests per minute per IP
|
||||||
|
- WebSocket connections: max 2 concurrent connections per user
|
||||||
|
(store active connections in a `Map<userId, Set<ws>>`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5 — Web Dashboard
|
||||||
|
|
||||||
|
A simple, functional HTML/CSS/JS dashboard. No framework — vanilla JS is fine.
|
||||||
|
This is a developer-friendly streamer tool, not a consumer SaaS, so clean and
|
||||||
|
functional beats flashy.
|
||||||
|
|
||||||
|
### Pages
|
||||||
|
|
||||||
|
**`/` (Landing / Login)**
|
||||||
|
- Brief product description (what this is, why it exists)
|
||||||
|
- Login form and link to register
|
||||||
|
- Link to GitHub/Gitea repo
|
||||||
|
|
||||||
|
**`/dashboard` (Post-login)**
|
||||||
|
- Current credit balance (minutes remaining, prominently displayed)
|
||||||
|
- "Buy Credits" section showing the three packages with Stripe Checkout buttons
|
||||||
|
- Usage chart: last 30 days bar chart (vanilla canvas or a small CDN chart lib)
|
||||||
|
- Recent sessions table: date, duration, status
|
||||||
|
|
||||||
|
**`/register`**
|
||||||
|
- Registration form
|
||||||
|
|
||||||
|
### Implementation Notes
|
||||||
|
- Store JWT in `localStorage`, attach as `Authorization` header on API calls
|
||||||
|
- Redirect to `/` if JWT missing or expired
|
||||||
|
- Keep CSS minimal but readable — this is a utility dashboard
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 6 — Desktop App Integration
|
||||||
|
|
||||||
|
Changes needed in the `local-transcription` Python repo.
|
||||||
|
|
||||||
|
### New file: `client/remote_transcription.py`
|
||||||
|
|
||||||
|
This module replaces `transcription_engine_realtime.py` when remote mode is active.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Pseudocode / spec for Claude Code to implement
|
||||||
|
|
||||||
|
class RemoteTranscriptionEngine:
|
||||||
|
"""
|
||||||
|
Connects to the transcription proxy WebSocket and streams audio.
|
||||||
|
Provides the same callback interface as the local engine so the
|
||||||
|
rest of the app doesn't need to change.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config, on_transcript_callback):
|
||||||
|
# config contains: server_url, auth_token (or byok_api_key), model
|
||||||
|
...
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
# Open WebSocket connection
|
||||||
|
# Send auth message
|
||||||
|
# Start audio capture thread (reuse existing audio_capture.py)
|
||||||
|
...
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
# Close WebSocket gracefully
|
||||||
|
...
|
||||||
|
|
||||||
|
def _on_audio_chunk(self, audio_data):
|
||||||
|
# Called by audio_capture.py with raw PCM data
|
||||||
|
# Send as binary WebSocket frame
|
||||||
|
...
|
||||||
|
|
||||||
|
def _on_server_message(self, message):
|
||||||
|
# Parse JSON from server
|
||||||
|
# On type='transcript': call on_transcript_callback
|
||||||
|
# On type='credits_low': trigger UI warning
|
||||||
|
# On type='error': surface to user
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### BYOK Mode
|
||||||
|
|
||||||
|
When user provides their own Deepgram key, connect directly to Deepgram instead of the proxy:
|
||||||
|
- Endpoint: `wss://api.deepgram.com/v1/listen?...`
|
||||||
|
- Auth: `Authorization: Token <user_key>`
|
||||||
|
- No session tracking (Deepgram handles billing directly to the user)
|
||||||
|
- Same `RemoteTranscriptionEngine` class, just different URL and auth header
|
||||||
|
|
||||||
|
### Settings Changes (`gui/settings_dialog_qt.py`)
|
||||||
|
|
||||||
|
Add a new "Transcription Mode" section:
|
||||||
|
|
||||||
|
```
|
||||||
|
Transcription Mode:
|
||||||
|
○ Local (Whisper) [existing behavior]
|
||||||
|
○ Remote - Managed [requires login]
|
||||||
|
○ Remote - BYOK [requires Deepgram API key]
|
||||||
|
|
||||||
|
[If Managed selected]:
|
||||||
|
Server URL: [____________]
|
||||||
|
[Login / Register] [View Balance: 420 min remaining]
|
||||||
|
|
||||||
|
[If BYOK selected]:
|
||||||
|
Deepgram API Key: [____________]
|
||||||
|
Model: [nova-2 ▼]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Config additions (`config/default_config.yaml`)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remote:
|
||||||
|
mode: local # local | managed | byok
|
||||||
|
server_url: "" # proxy server URL for managed mode
|
||||||
|
auth_token: "" # JWT stored after login
|
||||||
|
byok_api_key: "" # Deepgram key for BYOK mode
|
||||||
|
deepgram_model: nova-2
|
||||||
|
language: en-US
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build & Deployment Notes
|
||||||
|
|
||||||
|
### Docker Compose (local dev)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:15
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: transcription_proxy
|
||||||
|
POSTGRES_USER: user
|
||||||
|
POSTGRES_PASSWORD: password
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
app:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql://user:password@db:5432/transcription_proxy
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Deployment
|
||||||
|
|
||||||
|
This service is a good fit for deployment on AnHonestHost WHP as a containerized app,
|
||||||
|
or on a small DigitalOcean/Linode VPS. Requirements are light:
|
||||||
|
- 512MB RAM is sufficient
|
||||||
|
- Postgres can be the same instance as other services or managed (e.g., Supabase free tier)
|
||||||
|
- Needs a public domain with SSL for WebSocket (`wss://`) to work from desktop clients
|
||||||
|
|
||||||
|
Reverse proxy config (Nginx or HAProxy) should:
|
||||||
|
- Proxy HTTP → `localhost:3000`
|
||||||
|
- Pass `Upgrade` and `Connection` headers for WebSocket support
|
||||||
|
- Set `proxy_read_timeout 3600` (sessions can be long)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Order
|
||||||
|
|
||||||
|
Build and test in this sequence:
|
||||||
|
|
||||||
|
1. Project scaffold + DB connection + migrations
|
||||||
|
2. Auth (register/login/JWT) — test with curl
|
||||||
|
3. Stripe billing + webhook — test with Stripe CLI (`stripe listen`)
|
||||||
|
4. WebSocket proxy — test with a simple browser WebSocket client first
|
||||||
|
5. Usage tracking and credit decrement
|
||||||
|
6. Account/usage routes
|
||||||
|
7. Web dashboard
|
||||||
|
8. Desktop app integration (separate PR in local-transcription repo)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Decisions & Rationale
|
||||||
|
|
||||||
|
| Decision | Choice | Reason |
|
||||||
|
|---|---|---|
|
||||||
|
| Credits model | Prepaid | No surprise charges, simpler billing, better for irregular streamer usage |
|
||||||
|
| WebSocket library | `ws` | Lightweight, no abstraction overhead, plays well with raw binary audio |
|
||||||
|
| Auth | JWT (stateless) | Desktop app holds token locally; no session store needed |
|
||||||
|
| DB driver | `node-postgres` (pg) | No ORM overhead; schema is simple enough for raw SQL |
|
||||||
|
| Migrations | Raw SQL files | No dependency on Knex/Prisma; easy to inspect and reason about |
|
||||||
|
| Rate limiting | In-memory | Redis is overkill for this scale; single-process Node is fine initially |
|
||||||
|
| Frontend | Vanilla JS | Dashboard is simple utility UI; no framework justified |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What This Plan Does NOT Cover (Future Work)
|
||||||
|
|
||||||
|
- OAuth / social login
|
||||||
|
- Admin panel for managing users
|
||||||
|
- Refund / credit adjustment tooling
|
||||||
|
- Email verification
|
||||||
|
- Password reset flow
|
||||||
|
- Multi-language support beyond Deepgram's defaults
|
||||||
|
- Analytics / aggregated usage reporting
|
||||||
|
- Self-hosted Whisper inference as a third backend option
|
||||||
BIN
DEEPGRAM_PROXY_PLAN.md:Zone.Identifier
Normal file
BIN
DEEPGRAM_PROXY_PLAN.md:Zone.Identifier
Normal file
Binary file not shown.
202
README.md
202
README.md
@@ -1,13 +1,14 @@
|
|||||||
# Local Transcription
|
# Local Transcription
|
||||||
|
|
||||||
A real-time speech-to-text desktop application for streamers. Run locally on your machine with GPU or CPU, display transcriptions via OBS browser source, and optionally sync with other users through a multi-user server.
|
A real-time speech-to-text desktop application for streamers. Runs locally on your machine with GPU or CPU, displays transcriptions via OBS browser source, and optionally syncs with other users through a multi-user server.
|
||||||
|
|
||||||
**Version 1.4.0**
|
**Version 1.4.0**
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
|
- **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
|
||||||
- **Standalone Desktop App**: PySide6/Qt GUI that works without any server
|
- **Cross-Platform**: Native desktop app for Windows, macOS, and Linux via [Tauri](https://tauri.app/)
|
||||||
|
- **Dual Transcription Modes**: Local (Whisper) or cloud (Deepgram) with managed billing or BYOK
|
||||||
- **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
|
- **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
|
||||||
- **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
|
- **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
|
||||||
- **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
|
- **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
|
||||||
@@ -16,36 +17,70 @@ A real-time speech-to-text desktop application for streamers. Run locally on you
|
|||||||
- **Customizable Colors**: User-configurable colors for name, text, and background
|
- **Customizable Colors**: User-configurable colors for name, text, and background
|
||||||
- **Noise Suppression**: Built-in audio preprocessing to reduce background noise
|
- **Noise Suppression**: Built-in audio preprocessing to reduce background noise
|
||||||
- **Auto-Updates**: Automatic update checking with release notes display
|
- **Auto-Updates**: Automatic update checking with release notes display
|
||||||
- **Cross-Platform**: Builds available for Windows and Linux
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The application uses a two-process architecture:
|
||||||
|
|
||||||
|
1. **Tauri Shell** (Svelte 5 frontend) — lightweight native window (~50MB) rendering the UI
|
||||||
|
2. **Python Backend** (sidecar) — headless process running transcription, audio capture, and the OBS web server
|
||||||
|
|
||||||
|
The Tauri frontend communicates with the Python backend via REST API and WebSocket, following the same pattern as [voice-to-notes](https://repo.anhonesthost.net/MacroPad/voice-to-notes).
|
||||||
|
|
||||||
|
```
|
||||||
|
Tauri App (user launches this)
|
||||||
|
└─ Spawns Python backend as sidecar
|
||||||
|
├─ FastAPI REST API (control endpoints)
|
||||||
|
├─ WebSocket /ws/control (real-time state + transcriptions)
|
||||||
|
├─ OBS web display at http://localhost:8080
|
||||||
|
└─ Transcription engine (Whisper or Deepgram)
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Legacy GUI**: The original PySide6/Qt desktop GUI (`main.py`) still works alongside the new Tauri frontend during the transition period.
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Running from Source
|
### Running from Source
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Install dependencies
|
# Install Python dependencies
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
# Run the application
|
# Run the Tauri app (frontend + backend)
|
||||||
|
npm install
|
||||||
|
npm run tauri dev
|
||||||
|
|
||||||
|
# Or run just the headless backend (for development)
|
||||||
|
uv run python -m backend.main_headless
|
||||||
|
|
||||||
|
# Or run the legacy PySide6 GUI
|
||||||
uv run python main.py
|
uv run python main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using Pre-Built Executables
|
### Using Pre-Built Executables
|
||||||
|
|
||||||
Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases) and run the executable for your platform.
|
Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases):
|
||||||
|
|
||||||
|
- **App installer** (Tauri shell): `.msi` (Windows), `.dmg` (macOS), `.deb`/`.rpm`/`.AppImage` (Linux)
|
||||||
|
- **Sidecar** (Python backend): Download the matching `sidecar-*` zip for your platform (CUDA or CPU)
|
||||||
|
|
||||||
### Building from Source
|
### Building from Source
|
||||||
|
|
||||||
**Linux:**
|
|
||||||
```bash
|
```bash
|
||||||
./build.sh
|
# Build the Tauri app
|
||||||
# Output: dist/LocalTranscription/LocalTranscription
|
npm install
|
||||||
```
|
npm run tauri build
|
||||||
|
# Output: src-tauri/target/release/bundle/
|
||||||
|
|
||||||
**Windows:**
|
# Build the Python sidecar (headless, no Qt)
|
||||||
```cmd
|
uv sync
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
# Output: dist/local-transcription-backend/
|
||||||
|
|
||||||
|
# Build the legacy PySide6 app (Linux)
|
||||||
|
./build.sh
|
||||||
|
# Build the legacy PySide6 app (Windows)
|
||||||
build.bat
|
build.bat
|
||||||
# Output: dist\LocalTranscription\LocalTranscription.exe
|
|
||||||
```
|
```
|
||||||
|
|
||||||
For detailed build instructions, see [BUILD.md](BUILD.md).
|
For detailed build instructions, see [BUILD.md](BUILD.md).
|
||||||
@@ -57,14 +92,23 @@ For detailed build instructions, see [BUILD.md](BUILD.md).
|
|||||||
1. Launch the application
|
1. Launch the application
|
||||||
2. Select your microphone from the audio device dropdown
|
2. Select your microphone from the audio device dropdown
|
||||||
3. Choose a Whisper model (smaller = faster, larger = more accurate):
|
3. Choose a Whisper model (smaller = faster, larger = more accurate):
|
||||||
- `tiny.en` / `tiny` - Fastest, good for quick captions
|
- `tiny.en` / `tiny` — Fastest, good for quick captions
|
||||||
- `base.en` / `base` - Balanced speed and accuracy
|
- `base.en` / `base` — Balanced speed and accuracy
|
||||||
- `small.en` / `small` - Better accuracy
|
- `small.en` / `small` — Better accuracy
|
||||||
- `medium.en` / `medium` - High accuracy
|
- `medium.en` / `medium` — High accuracy
|
||||||
- `large-v3` - Best accuracy (requires more resources)
|
- `large-v3` — Best accuracy (requires more resources)
|
||||||
4. Click **Start** to begin transcription
|
4. Click **Start** to begin transcription
|
||||||
5. Transcriptions appear in the main window and at `http://localhost:8080`
|
5. Transcriptions appear in the main window and at `http://localhost:8080`
|
||||||
|
|
||||||
|
### Remote Transcription (Deepgram)
|
||||||
|
|
||||||
|
Instead of local Whisper models, you can use cloud-based transcription:
|
||||||
|
|
||||||
|
- **Managed mode**: Sign up via the transcription proxy for metered billing
|
||||||
|
- **BYOK mode**: Bring your own Deepgram API key for direct access
|
||||||
|
|
||||||
|
Configure in Settings > Remote Transcription.
|
||||||
|
|
||||||
### OBS Browser Source Setup
|
### OBS Browser Source Setup
|
||||||
|
|
||||||
1. Start the Local Transcription app
|
1. Start the Local Transcription app
|
||||||
@@ -88,7 +132,7 @@ For syncing transcriptions across multiple users (e.g., multi-host streams or tr
|
|||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel.
|
Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel or the REST API.
|
||||||
|
|
||||||
### Key Settings
|
### Key Settings
|
||||||
|
|
||||||
@@ -100,6 +144,7 @@ Settings are stored at `~/.local-transcription/config.yaml` and can be modified
|
|||||||
| `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
|
| `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
|
||||||
| `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
|
| `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
|
||||||
| `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
|
| `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
|
||||||
|
| `remote.mode` | Transcription mode (local/managed/byok) | `local` |
|
||||||
| `display.show_timestamps` | Show timestamps with transcriptions | `true` |
|
| `display.show_timestamps` | Show timestamps with transcriptions | `true` |
|
||||||
| `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
|
| `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
|
||||||
| `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
|
| `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
|
||||||
@@ -111,67 +156,114 @@ See [config/default_config.yaml](config/default_config.yaml) for all available o
|
|||||||
|
|
||||||
```
|
```
|
||||||
local-transcription/
|
local-transcription/
|
||||||
|
├── src/ # Svelte 5 frontend (Tauri UI)
|
||||||
|
│ ├── App.svelte # Main app shell
|
||||||
|
│ ├── lib/components/ # UI components
|
||||||
|
│ │ ├── Header.svelte
|
||||||
|
│ │ ├── StatusBar.svelte
|
||||||
|
│ │ ├── Controls.svelte
|
||||||
|
│ │ ├── TranscriptionDisplay.svelte
|
||||||
|
│ │ └── Settings.svelte
|
||||||
|
│ └── lib/stores/ # Reactive state management
|
||||||
|
│ ├── backend.ts # WebSocket + REST API client
|
||||||
|
│ ├── config.ts # App configuration
|
||||||
|
│ └── transcriptions.ts # Transcription data
|
||||||
|
├── src-tauri/ # Tauri v2 Rust shell
|
||||||
|
│ ├── src/main.rs
|
||||||
|
│ └── tauri.conf.json
|
||||||
|
├── backend/ # Headless Python backend (sidecar)
|
||||||
|
│ ├── app_controller.py # Orchestration logic (engine, sync, config)
|
||||||
|
│ ├── api_server.py # FastAPI REST + WebSocket control API
|
||||||
|
│ └── main_headless.py # Headless entry point
|
||||||
├── client/ # Core transcription modules
|
├── client/ # Core transcription modules
|
||||||
│ ├── audio_capture.py # Audio input handling
|
│ ├── audio_capture.py # Audio input handling
|
||||||
│ ├── transcription_engine_realtime.py # RealtimeSTT integration
|
│ ├── transcription_engine_realtime.py # RealtimeSTT / Whisper
|
||||||
|
│ ├── deepgram_transcription.py # Deepgram cloud transcription
|
||||||
│ ├── noise_suppression.py # VAD and noise reduction
|
│ ├── noise_suppression.py # VAD and noise reduction
|
||||||
│ ├── device_utils.py # CPU/GPU detection
|
│ ├── device_utils.py # CPU/GPU/MPS detection
|
||||||
│ ├── config.py # Configuration management
|
│ ├── config.py # Configuration management
|
||||||
│ ├── server_sync.py # Multi-user server client
|
│ ├── server_sync.py # Multi-user server client
|
||||||
│ └── update_checker.py # Auto-update functionality
|
│ └── update_checker.py # Auto-update functionality
|
||||||
├── gui/ # Desktop application UI
|
├── gui/ # Legacy PySide6/Qt GUI
|
||||||
│ ├── main_window_qt.py # Main application window
|
│ ├── main_window_qt.py
|
||||||
│ ├── settings_dialog_qt.py # Settings dialog
|
│ ├── settings_dialog_qt.py
|
||||||
│ └── transcription_display_qt.py # Display widget
|
│ └── transcription_display_qt.py
|
||||||
├── server/ # Web servers
|
├── server/ # Web servers
|
||||||
│ ├── web_display.py # Local FastAPI server for OBS
|
│ ├── web_display.py # Local FastAPI server for OBS
|
||||||
│ └── nodejs/ # Multi-user sync server
|
│ └── nodejs/ # Multi-user sync server
|
||||||
│ ├── server.js # Express + WebSocket server
|
├── .gitea/workflows/ # CI/CD
|
||||||
│ └── README.md # Deployment instructions
|
│ ├── release.yml # Tauri app builds (all platforms)
|
||||||
|
│ └── build-sidecar.yml # Python sidecar builds (CUDA + CPU)
|
||||||
├── config/
|
├── config/
|
||||||
│ └── default_config.yaml # Default settings template
|
│ └── default_config.yaml # Default settings template
|
||||||
├── main.py # GUI entry point
|
├── main.py # Legacy GUI entry point
|
||||||
├── main_cli.py # CLI version (for testing)
|
├── main_cli.py # CLI version (for testing)
|
||||||
├── build.sh # Linux build script
|
├── local-transcription.spec # PyInstaller config (legacy, with PySide6)
|
||||||
├── build.bat # Windows build script
|
├── local-transcription-headless.spec # PyInstaller config (headless sidecar)
|
||||||
└── local-transcription.spec # PyInstaller configuration
|
├── pyproject.toml # Python dependencies
|
||||||
|
└── package.json # Node.js / Tauri dependencies
|
||||||
```
|
```
|
||||||
|
|
||||||
## Technology Stack
|
## Technology Stack
|
||||||
|
|
||||||
### Desktop Application
|
### Frontend (Tauri)
|
||||||
|
- **Tauri v2** — Native cross-platform shell (Rust)
|
||||||
|
- **Svelte 5** — Reactive UI framework (TypeScript)
|
||||||
|
- **Vite** — Frontend build tool
|
||||||
|
|
||||||
|
### Backend (Python Sidecar)
|
||||||
- **Python 3.9+**
|
- **Python 3.9+**
|
||||||
- **PySide6** - Qt6 GUI framework
|
- **FastAPI + Uvicorn** — REST API and WebSocket server
|
||||||
- **RealtimeSTT** - Real-time speech-to-text with advanced VAD
|
- **RealtimeSTT** — Real-time speech-to-text with advanced VAD
|
||||||
- **faster-whisper** - Optimized Whisper model inference
|
- **faster-whisper** — Optimized Whisper model inference (CTranslate2)
|
||||||
- **PyTorch** - ML framework (CUDA-enabled)
|
- **PyTorch** — ML framework (CUDA-enabled builds available)
|
||||||
- **sounddevice** - Cross-platform audio capture
|
- **sounddevice** — Cross-platform audio capture
|
||||||
- **webrtcvad + silero_vad** - Voice activity detection
|
- **webrtcvad + silero_vad** — Voice activity detection
|
||||||
- **noisereduce** - Noise suppression
|
|
||||||
|
|
||||||
### Web Servers
|
### Multi-User Server (Optional)
|
||||||
- **FastAPI + Uvicorn** - Local web display server
|
- **Node.js + Express + WebSocket** — Real-time sync server
|
||||||
- **Node.js + Express + WebSocket** - Multi-user sync server
|
|
||||||
|
|
||||||
### Build Tools
|
### Build & CI/CD
|
||||||
- **PyInstaller** - Executable packaging
|
- **PyInstaller** — Python sidecar packaging
|
||||||
- **uv** - Fast Python package manager
|
- **Tauri CLI** — App bundling (.msi, .dmg, .deb, .rpm, .AppImage)
|
||||||
|
- **Gitea Actions** — Automated cross-platform builds
|
||||||
|
- **uv** — Fast Python package manager
|
||||||
|
|
||||||
|
## CI/CD
|
||||||
|
|
||||||
|
Two Gitea Actions workflows in `.gitea/workflows/`:
|
||||||
|
|
||||||
|
| Workflow | Trigger | Produces |
|
||||||
|
|----------|---------|----------|
|
||||||
|
| `release.yml` | Push to `main` | Tauri app installers for all platforms |
|
||||||
|
| `build-sidecar.yml` | Changes to `client/`, `server/`, `backend/`, or `pyproject.toml` | Python sidecar zips (CUDA + CPU) |
|
||||||
|
|
||||||
|
Both workflows require a `BUILD_TOKEN` secret in the repo settings (Gitea API token with release write access).
|
||||||
|
|
||||||
|
### Release Artifacts
|
||||||
|
|
||||||
|
| Platform | App Installer | Sidecar (CUDA) | Sidecar (CPU) |
|
||||||
|
|----------|--------------|----------------|---------------|
|
||||||
|
| Linux x86_64 | `.deb`, `.rpm`, `.AppImage` | `sidecar-linux-x86_64-cuda.zip` | `sidecar-linux-x86_64-cpu.zip` |
|
||||||
|
| Windows x86_64 | `.msi`, `-setup.exe` | `sidecar-windows-x86_64-cuda.zip` | `sidecar-windows-x86_64-cpu.zip` |
|
||||||
|
| macOS ARM64 | `.dmg` | — | `sidecar-macos-aarch64-cpu.zip` |
|
||||||
|
|
||||||
## System Requirements
|
## System Requirements
|
||||||
|
|
||||||
### Minimum
|
### Minimum
|
||||||
- Python 3.9+
|
|
||||||
- 4GB RAM
|
- 4GB RAM
|
||||||
- Any modern CPU
|
- Any modern CPU
|
||||||
|
|
||||||
### Recommended (for real-time performance)
|
### Recommended (for local real-time transcription)
|
||||||
- 8GB+ RAM
|
- 8GB+ RAM
|
||||||
- NVIDIA GPU with CUDA support (for GPU acceleration)
|
- NVIDIA GPU with CUDA support (for GPU acceleration)
|
||||||
- FFmpeg (installed automatically with dependencies)
|
|
||||||
|
|
||||||
### For Building
|
### For Building
|
||||||
- **Linux**: gcc, Python dev headers
|
- **Tauri app**: Node.js 20+, Rust stable, platform SDK (see [Tauri prerequisites](https://tauri.app/start/prerequisites/))
|
||||||
- **Windows**: Visual Studio Build Tools, Python dev headers
|
- **Python sidecar**: Python 3.9+, uv, PyInstaller
|
||||||
|
- **Linux**: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
|
||||||
|
- **Windows**: Visual Studio Build Tools, WebView2
|
||||||
|
- **macOS**: Xcode Command Line Tools
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
@@ -185,7 +277,7 @@ local-transcription/
|
|||||||
# List available audio devices
|
# List available audio devices
|
||||||
uv run python main_cli.py --list-devices
|
uv run python main_cli.py --list-devices
|
||||||
```
|
```
|
||||||
- Ensure microphone permissions are granted
|
- Ensure microphone permissions are granted (especially on macOS)
|
||||||
- Try different device indices in settings
|
- Try different device indices in settings
|
||||||
|
|
||||||
### GPU Not Detected
|
### GPU Not Detected
|
||||||
@@ -193,13 +285,13 @@ uv run python main_cli.py --list-devices
|
|||||||
# Check CUDA availability
|
# Check CUDA availability
|
||||||
uv run python -c "import torch; print(torch.cuda.is_available())"
|
uv run python -c "import torch; print(torch.cuda.is_available())"
|
||||||
```
|
```
|
||||||
- Install NVIDIA drivers (CUDA toolkit is bundled)
|
- Install NVIDIA drivers (CUDA toolkit is bundled in CUDA sidecar builds)
|
||||||
- The app automatically falls back to CPU if no GPU is available
|
- The app automatically falls back to CPU if no GPU is available
|
||||||
|
|
||||||
### Web Server Port Conflicts
|
### Web Server Port Conflicts
|
||||||
- Default port is 8080
|
- Default port is 8080; the app tries ports 8080-8084 automatically
|
||||||
- Change in settings or edit config file
|
- Change in settings or edit config file
|
||||||
- Check for conflicts: `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
|
- Check for conflicts: `lsof -i :8080` (Linux/macOS) or `netstat -ano | findstr :8080` (Windows)
|
||||||
|
|
||||||
## Use Cases
|
## Use Cases
|
||||||
|
|
||||||
@@ -222,3 +314,5 @@ MIT License
|
|||||||
- [OpenAI Whisper](https://github.com/openai/whisper) for the speech recognition model
|
- [OpenAI Whisper](https://github.com/openai/whisper) for the speech recognition model
|
||||||
- [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT) for real-time transcription capabilities
|
- [RealtimeSTT](https://github.com/KoljaB/RealtimeSTT) for real-time transcription capabilities
|
||||||
- [faster-whisper](https://github.com/guillaumekln/faster-whisper) for optimized inference
|
- [faster-whisper](https://github.com/guillaumekln/faster-whisper) for optimized inference
|
||||||
|
- [Tauri](https://tauri.app/) for the cross-platform desktop framework
|
||||||
|
- [Deepgram](https://deepgram.com/) for cloud transcription API
|
||||||
|
|||||||
1
backend/__init__.py
Normal file
1
backend/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Backend package for headless transcription service."""
|
||||||
323
backend/api_server.py
Normal file
323
backend/api_server.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
"""FastAPI control API server for the headless transcription backend.
|
||||||
|
|
||||||
|
Extends the existing OBS display server with REST endpoints and a
|
||||||
|
control WebSocket channel so that a Tauri (or any other) frontend
|
||||||
|
can drive the application.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, WebSocket, HTTPException
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from backend.app_controller import AppController
|
||||||
|
|
||||||
|
|
||||||
|
# ── Request / Response Models ──────────────────────────────────────
|
||||||
|
|
||||||
|
class ConfigUpdate(BaseModel):
|
||||||
|
"""Batch config update payload. Keys use dot-notation."""
|
||||||
|
settings: dict # e.g. {"user.name": "Alice", "transcription.model": "small.en"}
|
||||||
|
|
||||||
|
|
||||||
|
class LoginRequest(BaseModel):
|
||||||
|
email: str
|
||||||
|
password: str
|
||||||
|
server_url: str
|
||||||
|
|
||||||
|
|
||||||
|
class RegisterRequest(BaseModel):
|
||||||
|
email: str
|
||||||
|
password: str
|
||||||
|
server_url: str
|
||||||
|
|
||||||
|
|
||||||
|
class SkipVersionRequest(BaseModel):
|
||||||
|
version: str
|
||||||
|
|
||||||
|
|
||||||
|
class SaveFileRequest(BaseModel):
|
||||||
|
path: str
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
# ── API Server ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class APIServer:
|
||||||
|
"""Wraps AppController with a FastAPI application exposing control endpoints."""
|
||||||
|
|
||||||
|
def __init__(self, controller: AppController):
|
||||||
|
self.controller = controller
|
||||||
|
self.control_connections: List[WebSocket] = []
|
||||||
|
|
||||||
|
self.app = FastAPI(title="Local Transcription API", version="1.0.0")
|
||||||
|
|
||||||
|
# Allow Tauri webview origin
|
||||||
|
self.app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"], # Tauri uses tauri://localhost or https://tauri.localhost
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
self._setup_routes()
|
||||||
|
self._wire_controller_callbacks()
|
||||||
|
|
||||||
|
def _wire_controller_callbacks(self):
|
||||||
|
"""Wire AppController callbacks to broadcast over /ws/control."""
|
||||||
|
original_state_cb = self.controller.on_state_changed
|
||||||
|
|
||||||
|
def on_state_changed(state: str, message: str):
|
||||||
|
if original_state_cb:
|
||||||
|
original_state_cb(state, message)
|
||||||
|
self._broadcast_control({"type": "state_changed", "state": state, "message": message})
|
||||||
|
|
||||||
|
self.controller.on_state_changed = on_state_changed
|
||||||
|
|
||||||
|
def on_transcription(data: dict):
|
||||||
|
self._broadcast_control({"type": "transcription", **data})
|
||||||
|
|
||||||
|
self.controller.on_transcription = on_transcription
|
||||||
|
|
||||||
|
def on_preview(data: dict):
|
||||||
|
self._broadcast_control({"type": "preview", **data})
|
||||||
|
|
||||||
|
self.controller.on_preview = on_preview
|
||||||
|
|
||||||
|
def on_error(msg: str):
|
||||||
|
self._broadcast_control({"type": "error", "message": msg})
|
||||||
|
|
||||||
|
self.controller.on_error = on_error
|
||||||
|
|
||||||
|
def on_credits_low(seconds: int):
|
||||||
|
self._broadcast_control({"type": "credits_low", "seconds_remaining": seconds})
|
||||||
|
|
||||||
|
self.controller.on_credits_low = on_credits_low
|
||||||
|
|
||||||
|
def _broadcast_control(self, data: dict):
|
||||||
|
"""Send a message to all connected /ws/control clients."""
|
||||||
|
if not self.control_connections:
|
||||||
|
return
|
||||||
|
|
||||||
|
message = json.dumps(data)
|
||||||
|
disconnected = []
|
||||||
|
|
||||||
|
for ws in self.control_connections:
|
||||||
|
try:
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
ws.send_text(message),
|
||||||
|
asyncio.get_event_loop(),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
disconnected.append(ws)
|
||||||
|
|
||||||
|
for ws in disconnected:
|
||||||
|
self.control_connections.remove(ws)
|
||||||
|
|
||||||
|
def _setup_routes(self):
|
||||||
|
"""Register all API routes."""
|
||||||
|
app = self.app
|
||||||
|
ctrl = self.controller
|
||||||
|
|
||||||
|
# ── Status ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/status")
|
||||||
|
async def get_status():
|
||||||
|
return ctrl.get_status()
|
||||||
|
|
||||||
|
@app.get("/api/version")
|
||||||
|
async def get_version():
|
||||||
|
from version import __version__
|
||||||
|
return {"version": __version__}
|
||||||
|
|
||||||
|
# ── Transcription Control ──────────────────────────────
|
||||||
|
|
||||||
|
@app.post("/api/start")
|
||||||
|
async def start_transcription():
|
||||||
|
success, message = ctrl.start_transcription()
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=400, detail=message)
|
||||||
|
return {"status": "ok", "message": message}
|
||||||
|
|
||||||
|
@app.post("/api/stop")
|
||||||
|
async def stop_transcription():
|
||||||
|
success, message = ctrl.stop_transcription()
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=400, detail=message)
|
||||||
|
return {"status": "ok", "message": message}
|
||||||
|
|
||||||
|
@app.post("/api/clear")
|
||||||
|
async def clear_transcriptions():
|
||||||
|
count = ctrl.clear_transcriptions()
|
||||||
|
return {"status": "ok", "cleared": count}
|
||||||
|
|
||||||
|
@app.get("/api/transcriptions")
|
||||||
|
async def get_transcriptions():
|
||||||
|
show_timestamps = ctrl.config.get('display.show_timestamps', True)
|
||||||
|
return {
|
||||||
|
"count": len(ctrl.transcriptions),
|
||||||
|
"text": ctrl.get_transcriptions_text(include_timestamps=show_timestamps),
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"text": r.text,
|
||||||
|
"user_name": r.user_name,
|
||||||
|
"timestamp": r.timestamp.strftime("%H:%M:%S") if r.timestamp else None,
|
||||||
|
}
|
||||||
|
for r in ctrl.transcriptions
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/api/save-file")
|
||||||
|
async def save_file(req: SaveFileRequest):
|
||||||
|
"""Save text to a file (used by Tauri frontend after dialog)."""
|
||||||
|
from pathlib import Path
|
||||||
|
try:
|
||||||
|
Path(req.path).write_text(req.text, encoding="utf-8")
|
||||||
|
return {"status": "ok", "path": req.path}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
# ── Configuration ──────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/config")
|
||||||
|
async def get_config():
|
||||||
|
return ctrl.config.config
|
||||||
|
|
||||||
|
@app.put("/api/config")
|
||||||
|
async def update_config(update: ConfigUpdate):
|
||||||
|
engine_reloaded, message = ctrl.apply_settings(update.settings)
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"message": message,
|
||||||
|
"engine_reloaded": engine_reloaded,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Devices ────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/audio-devices")
|
||||||
|
async def get_audio_devices():
|
||||||
|
return {"devices": ctrl.get_audio_devices()}
|
||||||
|
|
||||||
|
@app.get("/api/compute-devices")
|
||||||
|
async def get_compute_devices():
|
||||||
|
return {"devices": ctrl.get_compute_devices()}
|
||||||
|
|
||||||
|
# ── Engine ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.post("/api/reload-engine")
|
||||||
|
async def reload_engine():
|
||||||
|
success, message = ctrl.reload_engine()
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=500, detail=message)
|
||||||
|
return {"status": "ok", "message": message}
|
||||||
|
|
||||||
|
# ── Updates ────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/check-update")
|
||||||
|
async def check_update():
|
||||||
|
return ctrl.check_for_updates()
|
||||||
|
|
||||||
|
@app.post("/api/skip-version")
|
||||||
|
async def skip_version(req: SkipVersionRequest):
|
||||||
|
ctrl.skip_version(req.version)
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
# ── Managed Mode Auth Proxy ────────────────────────────
|
||||||
|
|
||||||
|
@app.post("/api/login")
|
||||||
|
async def login(req: LoginRequest):
|
||||||
|
"""Proxy login to the transcription proxy server."""
|
||||||
|
import requests as http_requests
|
||||||
|
try:
|
||||||
|
resp = http_requests.post(
|
||||||
|
f"{req.server_url}/api/auth/login",
|
||||||
|
json={"email": req.email, "password": req.password},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
ctrl.config.set('remote.auth_token', data.get('token', ''))
|
||||||
|
ctrl.config.set('remote.server_url', req.server_url)
|
||||||
|
return {"status": "ok", "token": data.get('token', '')}
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=resp.status_code, detail=resp.text)
|
||||||
|
except http_requests.RequestException as e:
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
|
||||||
|
@app.post("/api/register")
|
||||||
|
async def register(req: RegisterRequest):
|
||||||
|
"""Proxy registration to the transcription proxy server."""
|
||||||
|
import requests as http_requests
|
||||||
|
try:
|
||||||
|
resp = http_requests.post(
|
||||||
|
f"{req.server_url}/api/auth/register",
|
||||||
|
json={"email": req.email, "password": req.password},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code in (200, 201):
|
||||||
|
return {"status": "ok", "data": resp.json()}
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=resp.status_code, detail=resp.text)
|
||||||
|
except http_requests.RequestException as e:
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/balance")
|
||||||
|
async def get_balance():
|
||||||
|
"""Proxy balance check to the transcription proxy server."""
|
||||||
|
import requests as http_requests
|
||||||
|
server_url = ctrl.config.get('remote.server_url', '')
|
||||||
|
token = ctrl.config.get('remote.auth_token', '')
|
||||||
|
if not server_url or not token:
|
||||||
|
raise HTTPException(status_code=400, detail="Not logged in to managed service")
|
||||||
|
try:
|
||||||
|
resp = http_requests.get(
|
||||||
|
f"{server_url}/api/billing/balance",
|
||||||
|
headers={"Authorization": f"Bearer {token}"},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.json()
|
||||||
|
else:
|
||||||
|
raise HTTPException(status_code=resp.status_code, detail=resp.text)
|
||||||
|
except http_requests.RequestException as e:
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
|
||||||
|
# ── Control WebSocket ──────────────────────────────────
|
||||||
|
|
||||||
|
@app.websocket("/ws/control")
|
||||||
|
async def websocket_control(websocket: WebSocket):
|
||||||
|
"""WebSocket channel for real-time state and transcription push."""
|
||||||
|
await websocket.accept()
|
||||||
|
self.control_connections.append(websocket)
|
||||||
|
|
||||||
|
# Send current status on connect
|
||||||
|
try:
|
||||||
|
await websocket.send_json({
|
||||||
|
"type": "state_changed",
|
||||||
|
"state": ctrl.state,
|
||||||
|
"message": "Connected",
|
||||||
|
})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
# Keep alive -- client sends pings
|
||||||
|
await websocket.receive_text()
|
||||||
|
except Exception:
|
||||||
|
if websocket in self.control_connections:
|
||||||
|
self.control_connections.remove(websocket)
|
||||||
|
|
||||||
|
# ── Mount the existing OBS display routes ──────────────
|
||||||
|
# The OBS display (GET / and /ws) is handled by the
|
||||||
|
# TranscriptionWebServer which shares the same Uvicorn
|
||||||
|
# instance. We mount it as a sub-application so the
|
||||||
|
# existing OBS URLs continue to work.
|
||||||
|
|
||||||
|
if ctrl.web_server:
|
||||||
|
app.mount("/obs", ctrl.web_server.app)
|
||||||
692
backend/app_controller.py
Normal file
692
backend/app_controller.py
Normal file
@@ -0,0 +1,692 @@
|
|||||||
|
"""Headless application controller for transcription backend.
|
||||||
|
|
||||||
|
Extracts orchestration logic from gui/main_window_qt.py into a
|
||||||
|
Qt-free class that manages engine lifecycle, web server, server sync,
|
||||||
|
and configuration -- all accessible via callbacks instead of Qt signals.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Thread, Lock
|
||||||
|
from typing import Callable, List, Optional
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add project root to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
|
from client.config import Config
|
||||||
|
from client.device_utils import DeviceManager
|
||||||
|
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
||||||
|
from client.deepgram_transcription import DeepgramTranscriptionEngine
|
||||||
|
from client.server_sync import ServerSyncClient
|
||||||
|
from server.web_display import TranscriptionWebServer
|
||||||
|
from version import __version__
|
||||||
|
|
||||||
|
|
||||||
|
class AppState:
|
||||||
|
"""Enum-like class for application states."""
|
||||||
|
INITIALIZING = "initializing"
|
||||||
|
READY = "ready"
|
||||||
|
TRANSCRIBING = "transcribing"
|
||||||
|
RELOADING = "reloading"
|
||||||
|
ERROR = "error"
|
||||||
|
|
||||||
|
|
||||||
|
class WebServerThread(Thread):
|
||||||
|
"""Thread for running the web server."""
|
||||||
|
|
||||||
|
def __init__(self, web_server: TranscriptionWebServer):
|
||||||
|
super().__init__(daemon=True)
|
||||||
|
self.web_server = web_server
|
||||||
|
self.loop: Optional[asyncio.AbstractEventLoop] = None
|
||||||
|
self.error: Optional[Exception] = None
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
self.loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(self.loop)
|
||||||
|
self.loop.run_until_complete(self.web_server.start())
|
||||||
|
except Exception as e:
|
||||||
|
self.error = e
|
||||||
|
print(f"ERROR: Web server failed to start: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class EngineInitThread(Thread):
|
||||||
|
"""Thread for initializing the transcription engine without blocking."""
|
||||||
|
|
||||||
|
def __init__(self, engine, on_complete: Callable[[bool, str], None]):
|
||||||
|
super().__init__(daemon=True)
|
||||||
|
self.engine = engine
|
||||||
|
self.on_complete = on_complete
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
success = self.engine.initialize()
|
||||||
|
if success:
|
||||||
|
self.on_complete(True, "Engine initialized successfully")
|
||||||
|
else:
|
||||||
|
self.on_complete(False, "Failed to initialize engine")
|
||||||
|
except Exception as e:
|
||||||
|
self.on_complete(False, f"Error initializing engine: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class AppController:
|
||||||
|
"""Headless controller managing the transcription application lifecycle.
|
||||||
|
|
||||||
|
This replaces the orchestration logic that previously lived in MainWindow.
|
||||||
|
It manages:
|
||||||
|
- Transcription engine lifecycle (init, start, stop, reload)
|
||||||
|
- Web server for OBS display
|
||||||
|
- Server sync for multi-user mode
|
||||||
|
- Configuration
|
||||||
|
- Update checking
|
||||||
|
|
||||||
|
All state changes are communicated via callbacks, making it UI-agnostic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Optional[Config] = None):
|
||||||
|
self.config = config or Config()
|
||||||
|
self.device_manager = DeviceManager()
|
||||||
|
|
||||||
|
# State
|
||||||
|
self._state = AppState.INITIALIZING
|
||||||
|
self._state_lock = Lock()
|
||||||
|
self.is_transcribing = False
|
||||||
|
|
||||||
|
# Engine
|
||||||
|
self.transcription_engine = None
|
||||||
|
self._engine_init_thread: Optional[EngineInitThread] = None
|
||||||
|
self.current_model_size: Optional[str] = None
|
||||||
|
self.current_device_config: Optional[str] = None
|
||||||
|
|
||||||
|
# Web server
|
||||||
|
self.web_server: Optional[TranscriptionWebServer] = None
|
||||||
|
self.web_server_thread: Optional[WebServerThread] = None
|
||||||
|
self.actual_web_port: Optional[int] = None
|
||||||
|
|
||||||
|
# Server sync
|
||||||
|
self.server_sync_client: Optional[ServerSyncClient] = None
|
||||||
|
|
||||||
|
# Transcription storage
|
||||||
|
self.transcriptions: List[TranscriptionResult] = []
|
||||||
|
|
||||||
|
# Callbacks for state notifications (set by the frontend / API server)
|
||||||
|
self.on_state_changed: Optional[Callable[[str, str], None]] = None # (state, message)
|
||||||
|
self.on_transcription: Optional[Callable[[dict], None]] = None # final transcription
|
||||||
|
self.on_preview: Optional[Callable[[dict], None]] = None # realtime preview
|
||||||
|
self.on_error: Optional[Callable[[str], None]] = None
|
||||||
|
self.on_credits_low: Optional[Callable[[int], None]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state(self) -> str:
|
||||||
|
with self._state_lock:
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
def _set_state(self, state: str, message: str = ""):
|
||||||
|
with self._state_lock:
|
||||||
|
self._state = state
|
||||||
|
if self.on_state_changed:
|
||||||
|
self.on_state_changed(state, message)
|
||||||
|
|
||||||
|
# ── Lifecycle ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def initialize(self):
|
||||||
|
"""Initialize the web server and transcription engine.
|
||||||
|
|
||||||
|
Call this once at startup. Non-blocking -- engine init happens
|
||||||
|
in a background thread.
|
||||||
|
"""
|
||||||
|
self._set_state(AppState.INITIALIZING, "Starting web server...")
|
||||||
|
self._start_web_server()
|
||||||
|
|
||||||
|
self._set_state(AppState.INITIALIZING, "Loading transcription engine...")
|
||||||
|
self._initialize_engine()
|
||||||
|
|
||||||
|
def shutdown(self):
|
||||||
|
"""Gracefully shut down all components."""
|
||||||
|
# Stop transcription
|
||||||
|
if self.is_transcribing:
|
||||||
|
self.stop_transcription()
|
||||||
|
|
||||||
|
# Stop web server
|
||||||
|
if self.web_server_thread and self.web_server_thread.is_alive():
|
||||||
|
try:
|
||||||
|
if self.web_server_thread.loop:
|
||||||
|
self.web_server_thread.loop.call_soon_threadsafe(
|
||||||
|
self.web_server_thread.loop.stop
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Error stopping web server: {e}")
|
||||||
|
|
||||||
|
# Stop transcription engine
|
||||||
|
if self.transcription_engine:
|
||||||
|
try:
|
||||||
|
self.transcription_engine.stop()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Error stopping engine: {e}")
|
||||||
|
|
||||||
|
# Wait for engine init thread
|
||||||
|
if self._engine_init_thread and self._engine_init_thread.is_alive():
|
||||||
|
self._engine_init_thread.join(timeout=5)
|
||||||
|
|
||||||
|
# ── Web Server ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _start_web_server(self):
|
||||||
|
"""Start the FastAPI web server for OBS display."""
|
||||||
|
try:
|
||||||
|
host = self.config.get('web_server.host', '127.0.0.1')
|
||||||
|
port = self.config.get('web_server.port', 8080)
|
||||||
|
|
||||||
|
# Gather display settings
|
||||||
|
ws_kwargs = self._get_web_server_kwargs(host, port)
|
||||||
|
|
||||||
|
# Try up to 5 ports
|
||||||
|
ports_to_try = [port] + [port + i for i in range(1, 5)]
|
||||||
|
|
||||||
|
for try_port in ports_to_try:
|
||||||
|
print(f"Attempting to start web server at http://{host}:{try_port}")
|
||||||
|
ws_kwargs['port'] = try_port
|
||||||
|
|
||||||
|
self.web_server = TranscriptionWebServer(**ws_kwargs)
|
||||||
|
self.web_server_thread = WebServerThread(self.web_server)
|
||||||
|
self.web_server_thread.start()
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
if self.web_server_thread.error:
|
||||||
|
error_str = str(self.web_server_thread.error)
|
||||||
|
if "address already in use" in error_str.lower() or "errno 98" in error_str.lower():
|
||||||
|
print(f"Port {try_port} is in use, trying next port...")
|
||||||
|
self.web_server = None
|
||||||
|
self.web_server_thread = None
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print(f"Web server failed to start: {self.web_server_thread.error}")
|
||||||
|
self.web_server = None
|
||||||
|
self.web_server_thread = None
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.actual_web_port = try_port
|
||||||
|
print(f"Web server started at http://{host}:{try_port}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"WARNING: Could not start web server on any port")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: Failed to initialize web server: {e}")
|
||||||
|
self.web_server = None
|
||||||
|
self.web_server_thread = None
|
||||||
|
|
||||||
|
def _get_web_server_kwargs(self, host: str, port: int) -> dict:
|
||||||
|
"""Build kwargs dict for TranscriptionWebServer from config."""
|
||||||
|
return dict(
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
show_timestamps=self.config.get('display.show_timestamps', True),
|
||||||
|
fade_after_seconds=self.config.get('display.fade_after_seconds', 10),
|
||||||
|
max_lines=self.config.get('display.max_lines', 50),
|
||||||
|
font_family=self.config.get('display.font_family', 'Arial'),
|
||||||
|
font_size=self.config.get('display.font_size', 16),
|
||||||
|
fonts_dir=self.config.fonts_dir,
|
||||||
|
font_source=self.config.get('display.font_source', 'System Font'),
|
||||||
|
websafe_font=self.config.get('display.websafe_font', 'Arial'),
|
||||||
|
google_font=self.config.get('display.google_font', 'Roboto'),
|
||||||
|
user_color=self.config.get('display.user_color', '#4CAF50'),
|
||||||
|
text_color=self.config.get('display.text_color', '#FFFFFF'),
|
||||||
|
background_color=self.config.get('display.background_color', '#000000B3'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Transcription Engine ───────────────────────────────────────
|
||||||
|
|
||||||
|
def _initialize_engine(self):
|
||||||
|
"""Initialize the transcription engine in a background thread."""
|
||||||
|
device_config = self.config.get('transcription.device', 'auto')
|
||||||
|
self.device_manager.set_device(device_config)
|
||||||
|
|
||||||
|
audio_device_str = self.config.get('audio.input_device', 'default')
|
||||||
|
audio_device = None if audio_device_str == 'default' else int(audio_device_str)
|
||||||
|
|
||||||
|
model = self.config.get('transcription.model', 'base.en')
|
||||||
|
language = self.config.get('transcription.language', 'en')
|
||||||
|
device = self.device_manager.get_device_for_whisper()
|
||||||
|
compute_type = self.config.get('transcription.compute_type', 'default')
|
||||||
|
|
||||||
|
self.current_model_size = model
|
||||||
|
self.current_device_config = device_config
|
||||||
|
|
||||||
|
user_name = self.config.get('user.name', 'User')
|
||||||
|
continuous_mode = self.config.get('transcription.continuous_mode', False)
|
||||||
|
|
||||||
|
if continuous_mode:
|
||||||
|
post_speech_silence = 0.15
|
||||||
|
min_gap = 0.0
|
||||||
|
min_recording = 0.3
|
||||||
|
else:
|
||||||
|
post_speech_silence = self.config.get('transcription.post_speech_silence_duration', 0.3)
|
||||||
|
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
|
||||||
|
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
|
||||||
|
|
||||||
|
remote_mode = self.config.get('remote.mode', 'local')
|
||||||
|
|
||||||
|
if remote_mode in ('managed', 'byok'):
|
||||||
|
self.transcription_engine = DeepgramTranscriptionEngine(
|
||||||
|
config=self.config,
|
||||||
|
user_name=user_name,
|
||||||
|
input_device_index=audio_device,
|
||||||
|
)
|
||||||
|
self.transcription_engine.set_callbacks(
|
||||||
|
realtime_callback=self._on_realtime_transcription,
|
||||||
|
final_callback=self._on_final_transcription,
|
||||||
|
)
|
||||||
|
self.transcription_engine.set_error_callback(self._on_remote_error)
|
||||||
|
self.transcription_engine.set_credits_low_callback(self._on_credits_low)
|
||||||
|
else:
|
||||||
|
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||||
|
model=model,
|
||||||
|
device=device,
|
||||||
|
language=language,
|
||||||
|
compute_type=compute_type,
|
||||||
|
enable_realtime_transcription=self.config.get('transcription.enable_realtime_transcription', False),
|
||||||
|
realtime_model=self.config.get('transcription.realtime_model', 'tiny.en'),
|
||||||
|
realtime_processing_pause=self.config.get('transcription.realtime_processing_pause', 0.1),
|
||||||
|
silero_sensitivity=self.config.get('transcription.silero_sensitivity', 0.4),
|
||||||
|
silero_use_onnx=self.config.get('transcription.silero_use_onnx', True),
|
||||||
|
webrtc_sensitivity=self.config.get('transcription.webrtc_sensitivity', 3),
|
||||||
|
post_speech_silence_duration=post_speech_silence,
|
||||||
|
min_length_of_recording=min_recording,
|
||||||
|
min_gap_between_recordings=min_gap,
|
||||||
|
pre_recording_buffer_duration=self.config.get('transcription.pre_recording_buffer_duration', 0.2),
|
||||||
|
beam_size=self.config.get('transcription.beam_size', 5),
|
||||||
|
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||||
|
no_log_file=self.config.get('transcription.no_log_file', True),
|
||||||
|
input_device_index=audio_device,
|
||||||
|
user_name=user_name,
|
||||||
|
)
|
||||||
|
self.transcription_engine.set_callbacks(
|
||||||
|
realtime_callback=self._on_realtime_transcription,
|
||||||
|
final_callback=self._on_final_transcription,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start init in background thread
|
||||||
|
self._engine_init_thread = EngineInitThread(
|
||||||
|
self.transcription_engine,
|
||||||
|
self._on_engine_ready,
|
||||||
|
)
|
||||||
|
self._engine_init_thread.start()
|
||||||
|
|
||||||
|
def _on_engine_ready(self, success: bool, message: str):
|
||||||
|
"""Called from EngineInitThread when engine init completes."""
|
||||||
|
if success:
|
||||||
|
remote_mode = self.config.get('remote.mode', 'local')
|
||||||
|
if remote_mode in ('managed', 'byok'):
|
||||||
|
mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
|
||||||
|
device_display = f"Deepgram ({mode_label})"
|
||||||
|
elif self.transcription_engine:
|
||||||
|
actual_device = self.transcription_engine.device
|
||||||
|
compute_type = self.transcription_engine.compute_type
|
||||||
|
device_display = f"{actual_device.upper()} ({compute_type})"
|
||||||
|
else:
|
||||||
|
device_display = "Unknown"
|
||||||
|
|
||||||
|
self._set_state(AppState.READY, f"Ready | Device: {device_display}")
|
||||||
|
else:
|
||||||
|
self._set_state(AppState.ERROR, message)
|
||||||
|
|
||||||
|
# ── Transcription Control ──────────────────────────────────────
|
||||||
|
|
||||||
|
def start_transcription(self) -> tuple[bool, str]:
|
||||||
|
"""Start transcription. Returns (success, message)."""
|
||||||
|
if self.is_transcribing:
|
||||||
|
return False, "Already transcribing"
|
||||||
|
|
||||||
|
if not self.transcription_engine or not self.transcription_engine.is_ready():
|
||||||
|
return False, "Transcription engine not ready"
|
||||||
|
|
||||||
|
try:
|
||||||
|
success = self.transcription_engine.start_recording()
|
||||||
|
if not success:
|
||||||
|
return False, "Failed to start recording"
|
||||||
|
|
||||||
|
# Start server sync if enabled
|
||||||
|
if self.config.get('server_sync.enabled', False):
|
||||||
|
self._start_server_sync()
|
||||||
|
|
||||||
|
self.is_transcribing = True
|
||||||
|
self._set_state(AppState.TRANSCRIBING, "Transcribing...")
|
||||||
|
return True, "Transcription started"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Failed to start transcription: {e}"
|
||||||
|
|
||||||
|
def stop_transcription(self) -> tuple[bool, str]:
|
||||||
|
"""Stop transcription. Returns (success, message)."""
|
||||||
|
if not self.is_transcribing:
|
||||||
|
return False, "Not transcribing"
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.transcription_engine:
|
||||||
|
self.transcription_engine.stop_recording()
|
||||||
|
|
||||||
|
if self.server_sync_client:
|
||||||
|
self.server_sync_client.stop()
|
||||||
|
self.server_sync_client = None
|
||||||
|
|
||||||
|
self.is_transcribing = False
|
||||||
|
self._set_state(AppState.READY, "Ready")
|
||||||
|
return True, "Transcription stopped"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return False, f"Failed to stop transcription: {e}"
|
||||||
|
|
||||||
|
def clear_transcriptions(self) -> int:
|
||||||
|
"""Clear stored transcriptions. Returns count of cleared items."""
|
||||||
|
count = len(self.transcriptions)
|
||||||
|
self.transcriptions.clear()
|
||||||
|
return count
|
||||||
|
|
||||||
|
def get_transcriptions_text(self, include_timestamps: bool = True) -> str:
|
||||||
|
"""Get all transcriptions as formatted text."""
|
||||||
|
lines = []
|
||||||
|
for result in self.transcriptions:
|
||||||
|
parts = []
|
||||||
|
if include_timestamps:
|
||||||
|
parts.append(f"[{result.timestamp.strftime('%H:%M:%S')}]")
|
||||||
|
if result.user_name and result.user_name.strip():
|
||||||
|
parts.append(f"{result.user_name}:")
|
||||||
|
parts.append(result.text)
|
||||||
|
lines.append(" ".join(parts))
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def reload_engine(self) -> tuple[bool, str]:
|
||||||
|
"""Reload the transcription engine with current config settings."""
|
||||||
|
try:
|
||||||
|
was_transcribing = self.is_transcribing
|
||||||
|
if was_transcribing:
|
||||||
|
self.stop_transcription()
|
||||||
|
|
||||||
|
self._set_state(AppState.RELOADING, "Reloading engine...")
|
||||||
|
|
||||||
|
# Wait for any existing init thread
|
||||||
|
if self._engine_init_thread and self._engine_init_thread.is_alive():
|
||||||
|
self._engine_init_thread.join(timeout=10)
|
||||||
|
|
||||||
|
# Stop current engine
|
||||||
|
if self.transcription_engine:
|
||||||
|
try:
|
||||||
|
self.transcription_engine.stop()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Error stopping engine: {e}")
|
||||||
|
|
||||||
|
# Re-initialize
|
||||||
|
self._initialize_engine()
|
||||||
|
return True, "Engine reload initiated"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._set_state(AppState.ERROR, f"Engine reload failed: {e}")
|
||||||
|
return False, str(e)
|
||||||
|
|
||||||
|
# ── Transcription Callbacks ────────────────────────────────────
|
||||||
|
|
||||||
|
def _on_realtime_transcription(self, result: TranscriptionResult):
|
||||||
|
"""Handle realtime (preview) transcription."""
|
||||||
|
if not self.is_transcribing:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Broadcast to web server
|
||||||
|
if self.web_server and self.web_server_thread and self.web_server_thread.loop:
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
self.web_server.broadcast_preview(
|
||||||
|
result.text, result.user_name, result.timestamp
|
||||||
|
),
|
||||||
|
self.web_server_thread.loop,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send to server sync
|
||||||
|
if self.server_sync_client:
|
||||||
|
self.server_sync_client.send_preview(result.text, result.timestamp)
|
||||||
|
|
||||||
|
# Notify frontend
|
||||||
|
if self.on_preview:
|
||||||
|
self.on_preview({
|
||||||
|
"text": result.text,
|
||||||
|
"user_name": result.user_name,
|
||||||
|
"timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
|
||||||
|
"is_preview": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error handling realtime transcription: {e}")
|
||||||
|
|
||||||
|
def _on_final_transcription(self, result: TranscriptionResult):
|
||||||
|
"""Handle final transcription."""
|
||||||
|
if not self.is_transcribing:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.transcriptions.append(result)
|
||||||
|
|
||||||
|
# Broadcast to web server
|
||||||
|
if self.web_server and self.web_server_thread and self.web_server_thread.loop:
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
self.web_server.broadcast_transcription(
|
||||||
|
result.text, result.user_name, result.timestamp
|
||||||
|
),
|
||||||
|
self.web_server_thread.loop,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Send to server sync
|
||||||
|
if self.server_sync_client:
|
||||||
|
self.server_sync_client.send_transcription(
|
||||||
|
result.text, result.timestamp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Notify frontend
|
||||||
|
if self.on_transcription:
|
||||||
|
self.on_transcription({
|
||||||
|
"text": result.text,
|
||||||
|
"user_name": result.user_name,
|
||||||
|
"timestamp": result.timestamp.strftime("%H:%M:%S") if result.timestamp else None,
|
||||||
|
"is_preview": False,
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error handling final transcription: {e}")
|
||||||
|
|
||||||
|
def _on_remote_error(self, error_msg: str):
|
||||||
|
"""Handle error from remote transcription service."""
|
||||||
|
print(f"Remote transcription error: {error_msg}")
|
||||||
|
if self.on_error:
|
||||||
|
self.on_error(error_msg)
|
||||||
|
|
||||||
|
def _on_credits_low(self, seconds_remaining: int):
|
||||||
|
"""Handle low credits warning from proxy."""
|
||||||
|
if self.on_credits_low:
|
||||||
|
self.on_credits_low(seconds_remaining)
|
||||||
|
|
||||||
|
# ── Server Sync ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _start_server_sync(self):
|
||||||
|
"""Start server sync client."""
|
||||||
|
try:
|
||||||
|
url = self.config.get('server_sync.url', '')
|
||||||
|
if not url:
|
||||||
|
print("Server sync enabled but no URL configured")
|
||||||
|
return
|
||||||
|
|
||||||
|
room = self.config.get('server_sync.room', 'default')
|
||||||
|
passphrase = self.config.get('server_sync.passphrase', '')
|
||||||
|
user_name = self.config.get('user.name', 'User')
|
||||||
|
fonts_dir = self.config.fonts_dir
|
||||||
|
|
||||||
|
font_source = self.config.get('display.font_source', 'System Font')
|
||||||
|
if font_source == "System Font":
|
||||||
|
font_source = "None"
|
||||||
|
|
||||||
|
self.server_sync_client = ServerSyncClient(
|
||||||
|
url=url,
|
||||||
|
room=room,
|
||||||
|
passphrase=passphrase,
|
||||||
|
user_name=user_name,
|
||||||
|
fonts_dir=fonts_dir,
|
||||||
|
font_source=font_source,
|
||||||
|
websafe_font=self.config.get('display.websafe_font', '') or None,
|
||||||
|
google_font=self.config.get('display.google_font', '') or None,
|
||||||
|
custom_font_file=self.config.get('display.custom_font_file', '') or None,
|
||||||
|
user_color=self.config.get('display.user_color', '#4CAF50'),
|
||||||
|
text_color=self.config.get('display.text_color', '#FFFFFF'),
|
||||||
|
background_color=self.config.get('display.background_color', '#000000B3'),
|
||||||
|
)
|
||||||
|
self.server_sync_client.start()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error starting server sync: {e}")
|
||||||
|
|
||||||
|
# ── Configuration ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def apply_settings(self, new_config: Optional[dict] = None) -> tuple[bool, str]:
|
||||||
|
"""Apply settings changes. If new_config is provided, merge it first.
|
||||||
|
|
||||||
|
Returns (engine_reload_needed, message).
|
||||||
|
"""
|
||||||
|
if new_config:
|
||||||
|
for key, value in new_config.items():
|
||||||
|
self.config.set(key, value)
|
||||||
|
|
||||||
|
# Update web server display settings
|
||||||
|
if self.web_server:
|
||||||
|
self.web_server.show_timestamps = self.config.get('display.show_timestamps', True)
|
||||||
|
self.web_server.fade_after_seconds = self.config.get('display.fade_after_seconds', 10)
|
||||||
|
self.web_server.max_lines = self.config.get('display.max_lines', 50)
|
||||||
|
self.web_server.font_family = self.config.get('display.font_family', 'Arial')
|
||||||
|
self.web_server.font_size = self.config.get('display.font_size', 16)
|
||||||
|
self.web_server.font_source = self.config.get('display.font_source', 'System Font')
|
||||||
|
self.web_server.websafe_font = self.config.get('display.websafe_font', 'Arial')
|
||||||
|
self.web_server.google_font = self.config.get('display.google_font', 'Roboto')
|
||||||
|
self.web_server.user_color = self.config.get('display.user_color', '#4CAF50')
|
||||||
|
self.web_server.text_color = self.config.get('display.text_color', '#FFFFFF')
|
||||||
|
self.web_server.background_color = self.config.get('display.background_color', '#000000B3')
|
||||||
|
|
||||||
|
# Restart server sync if running
|
||||||
|
if self.is_transcribing and self.server_sync_client:
|
||||||
|
self.server_sync_client.stop()
|
||||||
|
self.server_sync_client = None
|
||||||
|
if self.config.get('server_sync.enabled', False):
|
||||||
|
self._start_server_sync()
|
||||||
|
|
||||||
|
# Check if model/device changed
|
||||||
|
new_model = self.config.get('transcription.model', 'base.en')
|
||||||
|
new_device = self.config.get('transcription.device', 'auto')
|
||||||
|
engine_reload_needed = (
|
||||||
|
self.current_model_size != new_model
|
||||||
|
or self.current_device_config != new_device
|
||||||
|
)
|
||||||
|
|
||||||
|
if engine_reload_needed:
|
||||||
|
self.reload_engine()
|
||||||
|
return True, "Settings applied. Engine reloading with new model/device."
|
||||||
|
else:
|
||||||
|
return False, "Settings applied successfully."
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
"""Get current application status as a dict."""
|
||||||
|
host = self.config.get('web_server.host', '127.0.0.1')
|
||||||
|
port = self.actual_web_port or self.config.get('web_server.port', 8080)
|
||||||
|
|
||||||
|
device_info = self.device_manager.get_device_info()
|
||||||
|
|
||||||
|
remote_mode = self.config.get('remote.mode', 'local')
|
||||||
|
if remote_mode in ('managed', 'byok') and self.transcription_engine:
|
||||||
|
mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
|
||||||
|
engine_device = f"Deepgram ({mode_label})"
|
||||||
|
elif self.transcription_engine and hasattr(self.transcription_engine, 'device'):
|
||||||
|
engine_device = f"{self.transcription_engine.device.upper()} ({self.transcription_engine.compute_type})"
|
||||||
|
else:
|
||||||
|
engine_device = "Not initialized"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"state": self.state,
|
||||||
|
"is_transcribing": self.is_transcribing,
|
||||||
|
"version": __version__,
|
||||||
|
"engine_device": engine_device,
|
||||||
|
"web_server": {
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"url": f"http://{host}:{port}",
|
||||||
|
"running": self.web_server_thread is not None and self.web_server_thread.is_alive(),
|
||||||
|
},
|
||||||
|
"transcription_count": len(self.transcriptions),
|
||||||
|
"remote_mode": remote_mode,
|
||||||
|
"server_sync_enabled": self.config.get('server_sync.enabled', False),
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_audio_devices(self) -> list[dict]:
|
||||||
|
"""List available audio input devices."""
|
||||||
|
import sounddevice as sd
|
||||||
|
devices = []
|
||||||
|
try:
|
||||||
|
device_list = sd.query_devices()
|
||||||
|
for i, device in enumerate(device_list):
|
||||||
|
if device['max_input_channels'] > 0:
|
||||||
|
devices.append({"index": i, "name": device['name']})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if not devices:
|
||||||
|
devices = [{"index": 0, "name": "Default"}]
|
||||||
|
return devices
|
||||||
|
|
||||||
|
def get_compute_devices(self) -> list[dict]:
|
||||||
|
"""List available compute devices."""
|
||||||
|
device_info = self.device_manager.get_device_info()
|
||||||
|
devices = [{"id": "auto", "name": "Auto-detect"}]
|
||||||
|
for dev_id, dev_name in device_info:
|
||||||
|
devices.append({"id": dev_id, "name": dev_name})
|
||||||
|
return devices
|
||||||
|
|
||||||
|
# ── Update Checking ────────────────────────────────────────────
|
||||||
|
|
||||||
|
def check_for_updates(self) -> dict:
|
||||||
|
"""Check for updates synchronously. Returns update info or None."""
|
||||||
|
from client.update_checker import UpdateChecker
|
||||||
|
|
||||||
|
gitea_url = self.config.get('updates.gitea_url', 'https://repo.anhonesthost.net')
|
||||||
|
owner = self.config.get('updates.owner', 'streamer-tools')
|
||||||
|
repo = self.config.get('updates.repo', 'local-transcription')
|
||||||
|
|
||||||
|
if not gitea_url or not owner or not repo:
|
||||||
|
return {"available": False, "error": "Update checking not configured"}
|
||||||
|
|
||||||
|
checker = UpdateChecker(
|
||||||
|
current_version=__version__,
|
||||||
|
gitea_url=gitea_url,
|
||||||
|
owner=owner,
|
||||||
|
repo=repo,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
release_info = checker.check_for_update()
|
||||||
|
self.config.set('updates.last_check', datetime.now().isoformat())
|
||||||
|
|
||||||
|
if release_info:
|
||||||
|
skipped = self.config.get('updates.skipped_versions', [])
|
||||||
|
return {
|
||||||
|
"available": True,
|
||||||
|
"version": release_info.version,
|
||||||
|
"download_url": release_info.download_url,
|
||||||
|
"release_notes": release_info.release_notes,
|
||||||
|
"skipped": release_info.version in skipped,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {"available": False, "current_version": __version__}
|
||||||
|
except Exception as e:
|
||||||
|
return {"available": False, "error": str(e)}
|
||||||
|
|
||||||
|
def skip_version(self, version: str):
|
||||||
|
"""Mark a version as skipped for update notifications."""
|
||||||
|
skipped = self.config.get('updates.skipped_versions', [])
|
||||||
|
if version not in skipped:
|
||||||
|
skipped.append(version)
|
||||||
|
self.config.set('updates.skipped_versions', skipped)
|
||||||
126
backend/main_headless.py
Normal file
126
backend/main_headless.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Headless entry point for the Local Transcription backend.
|
||||||
|
|
||||||
|
Runs the transcription engine + API server without any GUI (no PySide6).
|
||||||
|
Designed to be launched as a Tauri sidecar or run standalone for development.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python -m backend.main_headless [--port PORT] [--host HOST]
|
||||||
|
|
||||||
|
The backend prints the actual port to stdout as JSON on startup:
|
||||||
|
{"event": "ready", "port": 8080}
|
||||||
|
|
||||||
|
This allows the Tauri shell to discover which port the backend bound to.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import multiprocessing
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Must be called before anything else for PyInstaller compatibility
|
||||||
|
multiprocessing.freeze_support()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
multiprocessing.set_start_method('spawn', force=True)
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Add project root to path
|
||||||
|
project_root = Path(__file__).resolve().parent.parent
|
||||||
|
sys.path.insert(0, str(project_root))
|
||||||
|
os.chdir(project_root)
|
||||||
|
|
||||||
|
from client.instance_lock import InstanceLock
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Local Transcription headless backend")
|
||||||
|
parser.add_argument("--host", default="127.0.0.1", help="API server host (default: 127.0.0.1)")
|
||||||
|
parser.add_argument("--port", type=int, default=8080, help="API server port (default: 8080)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
instance_lock = InstanceLock()
|
||||||
|
if not instance_lock.acquire():
|
||||||
|
print(json.dumps({"event": "error", "message": "Another instance is already running"}),
|
||||||
|
flush=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def handle_shutdown(signum, frame):
|
||||||
|
print(json.dumps({"event": "shutdown"}), flush=True)
|
||||||
|
if controller:
|
||||||
|
controller.shutdown()
|
||||||
|
instance_lock.release()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGTERM, handle_shutdown)
|
||||||
|
signal.signal(signal.SIGINT, handle_shutdown)
|
||||||
|
|
||||||
|
controller = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from backend.app_controller import AppController
|
||||||
|
from backend.api_server import APIServer
|
||||||
|
|
||||||
|
# Override web server port from CLI arg
|
||||||
|
from client.config import Config
|
||||||
|
config = Config()
|
||||||
|
config.set('web_server.host', args.host)
|
||||||
|
config.set('web_server.port', args.port)
|
||||||
|
|
||||||
|
# Create controller and initialize
|
||||||
|
controller = AppController(config=config)
|
||||||
|
|
||||||
|
# Wire a state callback that prints the ready event
|
||||||
|
def on_state_changed(state, message):
|
||||||
|
event = {"event": "state", "state": state, "message": message}
|
||||||
|
print(json.dumps(event), flush=True)
|
||||||
|
|
||||||
|
controller.on_state_changed = on_state_changed
|
||||||
|
|
||||||
|
# Initialize engine + web server
|
||||||
|
controller.initialize()
|
||||||
|
|
||||||
|
# Create API server wrapping the controller
|
||||||
|
api_server = APIServer(controller)
|
||||||
|
|
||||||
|
# Determine actual port (web server may have shifted if port was in use)
|
||||||
|
actual_port = controller.actual_web_port or args.port
|
||||||
|
|
||||||
|
# Print ready event so Tauri can discover the port
|
||||||
|
print(json.dumps({"event": "ready", "port": actual_port}), flush=True)
|
||||||
|
|
||||||
|
# Run the API server (blocks)
|
||||||
|
import uvicorn
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.getLogger("uvicorn").setLevel(logging.ERROR)
|
||||||
|
logging.getLogger("uvicorn.access").setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
api_server.app,
|
||||||
|
host=args.host,
|
||||||
|
port=actual_port + 1, # API on port+1, OBS display on the main port
|
||||||
|
log_level="error",
|
||||||
|
access_log=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print(json.dumps({"event": "shutdown", "reason": "keyboard_interrupt"}), flush=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(json.dumps({"event": "error", "message": str(e)}), flush=True)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
sys.exit(1)
|
||||||
|
finally:
|
||||||
|
if controller:
|
||||||
|
controller.shutdown()
|
||||||
|
instance_lock.release()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -48,6 +48,25 @@ class Config:
|
|||||||
# Save the default configuration
|
# Save the default configuration
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
|
# Migrate remote_processing -> remote
|
||||||
|
self._migrate_remote_config()
|
||||||
|
|
||||||
|
def _migrate_remote_config(self):
|
||||||
|
"""Migrate old remote_processing config to new remote config."""
|
||||||
|
if 'remote_processing' in self.config and 'remote' not in self.config:
|
||||||
|
old = self.config['remote_processing']
|
||||||
|
self.config['remote'] = {
|
||||||
|
'mode': 'managed' if old.get('enabled', False) else 'local',
|
||||||
|
'server_url': old.get('server_url', ''),
|
||||||
|
'auth_token': '',
|
||||||
|
'byok_api_key': old.get('api_key', ''),
|
||||||
|
'deepgram_model': 'nova-2',
|
||||||
|
'language': 'en-US',
|
||||||
|
'fallback_to_local': old.get('fallback_to_local', True),
|
||||||
|
}
|
||||||
|
del self.config['remote_processing']
|
||||||
|
self.save()
|
||||||
|
|
||||||
def save(self) -> None:
|
def save(self) -> None:
|
||||||
"""Save current configuration to file."""
|
"""Save current configuration to file."""
|
||||||
with open(self.config_path, 'w') as f:
|
with open(self.config_path, 'w') as f:
|
||||||
|
|||||||
528
client/deepgram_transcription.py
Normal file
528
client/deepgram_transcription.py
Normal file
@@ -0,0 +1,528 @@
|
|||||||
|
"""Deepgram-based transcription engine using WebSocket streaming.
|
||||||
|
|
||||||
|
Supports two modes:
|
||||||
|
- Managed mode: connects to a proxy server that handles Deepgram credentials
|
||||||
|
- BYOK mode: connects directly to the Deepgram API with a user-provided key
|
||||||
|
|
||||||
|
Implements the same duck-type interface as RealtimeTranscriptionEngine so
|
||||||
|
MainWindow can use it as a drop-in replacement.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
import threading
|
||||||
|
from datetime import datetime
|
||||||
|
from queue import Queue, Empty
|
||||||
|
from typing import Optional, Callable
|
||||||
|
|
||||||
|
from client.transcription_engine_realtime import TranscriptionResult
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DeepgramTranscriptionEngine:
|
||||||
|
"""
|
||||||
|
Transcription engine that streams audio to Deepgram via WebSocket.
|
||||||
|
|
||||||
|
In managed mode the connection goes through a proxy at
|
||||||
|
``wss://<server>/ws/transcribe`` which handles authentication and
|
||||||
|
Deepgram credentials. In BYOK (bring-your-own-key) mode the
|
||||||
|
connection goes directly to the Deepgram API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Construction / configuration
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def __init__(self, config, user_name: str = "User", input_device_index: Optional[int] = None):
|
||||||
|
"""
|
||||||
|
Initialise the engine from a :class:`client.config.Config` object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Application ``Config`` instance.
|
||||||
|
user_name: Display name attached to transcriptions.
|
||||||
|
input_device_index: Index of the audio input device to use
|
||||||
|
(``None`` for the system default).
|
||||||
|
"""
|
||||||
|
self.config = config
|
||||||
|
self.user_name = user_name
|
||||||
|
self.input_device_index = input_device_index
|
||||||
|
|
||||||
|
# Mode: 'managed' (proxy) or 'byok' (direct Deepgram)
|
||||||
|
self.mode: str = config.get("remote.mode", "managed")
|
||||||
|
|
||||||
|
# Managed-mode settings
|
||||||
|
self.server_url: str = config.get("remote.server_url", "")
|
||||||
|
self.auth_token: str = config.get("remote.auth_token", "")
|
||||||
|
|
||||||
|
# BYOK-mode settings
|
||||||
|
self.byok_api_key: str = config.get("remote.byok_api_key", "")
|
||||||
|
|
||||||
|
# Deepgram model / language (used in both modes)
|
||||||
|
self.deepgram_model: str = config.get("remote.deepgram_model", "nova-2")
|
||||||
|
self.language: str = config.get("remote.language", "en-US")
|
||||||
|
|
||||||
|
# Audio parameters
|
||||||
|
self.sample_rate: int = 16000
|
||||||
|
self.channels: int = 1
|
||||||
|
self.blocksize: int = 4096
|
||||||
|
|
||||||
|
# Callbacks
|
||||||
|
self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
|
||||||
|
self.final_callback: Optional[Callable[[TranscriptionResult], None]] = None
|
||||||
|
self._on_error: Optional[Callable[[str], None]] = None
|
||||||
|
self._on_credits_low: Optional[Callable[[int], None]] = None
|
||||||
|
|
||||||
|
# Internal state
|
||||||
|
self._is_initialized: bool = False
|
||||||
|
self._is_recording: bool = False
|
||||||
|
self._stop_event: threading.Event = threading.Event()
|
||||||
|
self._audio_queue: Queue = Queue()
|
||||||
|
|
||||||
|
# Asyncio event loop running in a daemon thread
|
||||||
|
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||||
|
self._thread: Optional[threading.Thread] = None
|
||||||
|
|
||||||
|
# WebSocket handle (set inside the async context)
|
||||||
|
self._ws = None
|
||||||
|
|
||||||
|
# sounddevice InputStream
|
||||||
|
self._stream = None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Callback setters
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def set_callbacks(
|
||||||
|
self,
|
||||||
|
realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None,
|
||||||
|
final_callback: Optional[Callable[[TranscriptionResult], None]] = None,
|
||||||
|
):
|
||||||
|
"""Set transcription result callbacks (matches RealtimeTranscriptionEngine API)."""
|
||||||
|
self.realtime_callback = realtime_callback
|
||||||
|
self.final_callback = final_callback
|
||||||
|
|
||||||
|
def set_error_callback(self, fn: Optional[Callable[[str], None]]):
|
||||||
|
"""Set a callback invoked on errors. ``fn`` receives a string message."""
|
||||||
|
self._on_error = fn
|
||||||
|
|
||||||
|
def set_credits_low_callback(self, fn: Optional[Callable[[int], None]]):
|
||||||
|
"""Set a callback for low-credit warnings. ``fn`` receives seconds remaining."""
|
||||||
|
self._on_credits_low = fn
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Public interface (duck-typed with RealtimeTranscriptionEngine)
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def initialize(self) -> bool:
|
||||||
|
"""Validate configuration and mark the engine as ready.
|
||||||
|
|
||||||
|
Returns ``True`` when the engine is ready to start recording.
|
||||||
|
"""
|
||||||
|
if self._is_initialized:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if self.mode == "managed":
|
||||||
|
if not self.server_url:
|
||||||
|
logger.error("Managed mode requires a server URL (remote.server_url)")
|
||||||
|
return False
|
||||||
|
if not self.auth_token:
|
||||||
|
logger.error("Managed mode requires an auth token (remote.auth_token)")
|
||||||
|
return False
|
||||||
|
elif self.mode == "byok":
|
||||||
|
if not self.byok_api_key:
|
||||||
|
logger.error("BYOK mode requires an API key (remote.byok_api_key)")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logger.error("Unknown remote mode: %s (expected 'managed' or 'byok')", self.mode)
|
||||||
|
return False
|
||||||
|
|
||||||
|
self._is_initialized = True
|
||||||
|
logger.info("DeepgramTranscriptionEngine initialised in %s mode", self.mode)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def start_recording(self) -> bool:
|
||||||
|
"""Open the audio stream and connect the WebSocket.
|
||||||
|
|
||||||
|
Returns ``True`` on success.
|
||||||
|
"""
|
||||||
|
if not self._is_initialized:
|
||||||
|
logger.error("Engine not initialised -- call initialize() first")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self._is_recording:
|
||||||
|
return True
|
||||||
|
|
||||||
|
self._stop_event.clear()
|
||||||
|
self._is_recording = True
|
||||||
|
|
||||||
|
# Start the asyncio event-loop thread (handles WS send/receive)
|
||||||
|
self._thread = threading.Thread(target=self._run_event_loop, daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
|
||||||
|
# Start the audio capture stream
|
||||||
|
try:
|
||||||
|
self._start_audio_stream()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Failed to open audio stream: %s", exc)
|
||||||
|
self._is_recording = False
|
||||||
|
self._stop_event.set()
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info("Recording started")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def stop_recording(self):
|
||||||
|
"""Stop audio capture and close the WebSocket."""
|
||||||
|
if not self._is_recording:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._is_recording = False
|
||||||
|
self._stop_event.set()
|
||||||
|
|
||||||
|
# Stop audio stream
|
||||||
|
self._stop_audio_stream()
|
||||||
|
|
||||||
|
# Close WebSocket from outside the event-loop thread
|
||||||
|
if self._ws is not None and self._loop is not None and not self._loop.is_closed():
|
||||||
|
asyncio.run_coroutine_threadsafe(self._close_ws(), self._loop)
|
||||||
|
|
||||||
|
# Wait for the thread to finish
|
||||||
|
if self._thread is not None:
|
||||||
|
self._thread.join(timeout=5)
|
||||||
|
self._thread = None
|
||||||
|
|
||||||
|
logger.info("Recording stopped")
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""Full shutdown -- stop recording and release all resources."""
|
||||||
|
self.stop_recording()
|
||||||
|
self._is_initialized = False
|
||||||
|
logger.info("DeepgramTranscriptionEngine shut down")
|
||||||
|
|
||||||
|
def is_ready(self) -> bool:
|
||||||
|
"""Return ``True`` if the engine has been successfully initialised."""
|
||||||
|
return self._is_initialized
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Audio capture (sounddevice)
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def _start_audio_stream(self):
|
||||||
|
"""Open a ``sounddevice.InputStream`` that feeds the audio queue."""
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
def _audio_callback(indata, frames, time_info, status): # noqa: ARG001
|
||||||
|
if status:
|
||||||
|
logger.warning("Audio stream status: %s", status)
|
||||||
|
if self._is_recording:
|
||||||
|
# float32 -> int16 PCM bytes
|
||||||
|
pcm = (indata * 32767).astype(np.int16).tobytes()
|
||||||
|
self._audio_queue.put(pcm)
|
||||||
|
|
||||||
|
self._stream = sd.InputStream(
|
||||||
|
samplerate=self.sample_rate,
|
||||||
|
blocksize=self.blocksize,
|
||||||
|
channels=self.channels,
|
||||||
|
dtype="float32",
|
||||||
|
device=self.input_device_index,
|
||||||
|
callback=_audio_callback,
|
||||||
|
)
|
||||||
|
self._stream.start()
|
||||||
|
|
||||||
|
def _stop_audio_stream(self):
|
||||||
|
"""Close the audio input stream."""
|
||||||
|
if self._stream is not None:
|
||||||
|
try:
|
||||||
|
self._stream.stop()
|
||||||
|
self._stream.close()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Error closing audio stream: %s", exc)
|
||||||
|
finally:
|
||||||
|
self._stream = None
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Asyncio event-loop (runs in daemon thread)
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def _run_event_loop(self):
|
||||||
|
"""Entry point for the daemon thread -- runs the async event loop."""
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(self._loop)
|
||||||
|
try:
|
||||||
|
self._loop.run_until_complete(self._ws_lifecycle())
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Event-loop error: %s", exc)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
self._loop.run_until_complete(self._loop.shutdown_asyncgens())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._loop.close()
|
||||||
|
self._loop = None
|
||||||
|
|
||||||
|
async def _ws_lifecycle(self):
|
||||||
|
"""Connect, authenticate (if managed), then run send/receive loops."""
|
||||||
|
import websockets
|
||||||
|
|
||||||
|
try:
|
||||||
|
ws_url, extra_headers = self._build_ws_url_and_headers()
|
||||||
|
|
||||||
|
logger.info("Connecting to %s", ws_url)
|
||||||
|
self._ws = await websockets.connect(
|
||||||
|
ws_url,
|
||||||
|
additional_headers=extra_headers,
|
||||||
|
ping_interval=20,
|
||||||
|
ping_timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Managed mode: send auth message and wait for ready
|
||||||
|
if self.mode == "managed":
|
||||||
|
if not await self._managed_handshake():
|
||||||
|
return
|
||||||
|
|
||||||
|
# Run send and receive concurrently
|
||||||
|
await asyncio.gather(
|
||||||
|
self._send_loop(),
|
||||||
|
self._receive_loop(),
|
||||||
|
)
|
||||||
|
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
except Exception as exc:
|
||||||
|
msg = f"WebSocket error: {exc}"
|
||||||
|
logger.error(msg)
|
||||||
|
if self._on_error:
|
||||||
|
self._on_error(msg)
|
||||||
|
finally:
|
||||||
|
await self._close_ws()
|
||||||
|
|
||||||
|
def _build_ws_url_and_headers(self):
|
||||||
|
"""Return ``(url, headers)`` depending on the current mode."""
|
||||||
|
if self.mode == "managed":
|
||||||
|
# Ensure the server URL uses wss:// and append the path
|
||||||
|
url = self.server_url.rstrip("/")
|
||||||
|
if not url.startswith("ws://") and not url.startswith("wss://"):
|
||||||
|
url = f"wss://{url}"
|
||||||
|
url = f"{url}/ws/transcribe"
|
||||||
|
return url, {}
|
||||||
|
|
||||||
|
# BYOK -- connect directly to Deepgram
|
||||||
|
params = (
|
||||||
|
f"model={self.deepgram_model}"
|
||||||
|
f"&language={self.language}"
|
||||||
|
"&interim_results=true"
|
||||||
|
"&encoding=linear16"
|
||||||
|
f"&sample_rate={self.sample_rate}"
|
||||||
|
f"&channels={self.channels}"
|
||||||
|
)
|
||||||
|
url = f"wss://api.deepgram.com/v1/listen?{params}"
|
||||||
|
headers = {"Authorization": f"Token {self.byok_api_key}"}
|
||||||
|
return url, headers
|
||||||
|
|
||||||
|
# -- managed-mode handshake ---------------------------------------- #
|
||||||
|
|
||||||
|
async def _managed_handshake(self) -> bool:
|
||||||
|
"""Send auth message and wait for ``ready`` (managed mode).
|
||||||
|
|
||||||
|
Returns ``True`` on success.
|
||||||
|
"""
|
||||||
|
auth_msg = {
|
||||||
|
"type": "auth",
|
||||||
|
"token": self.auth_token,
|
||||||
|
"config": {
|
||||||
|
"model": self.deepgram_model,
|
||||||
|
"language": self.language,
|
||||||
|
"sample_rate": self.sample_rate,
|
||||||
|
"channels": self.channels,
|
||||||
|
"encoding": "linear16",
|
||||||
|
"interim_results": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
await self._ws.send(json.dumps(auth_msg))
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = await asyncio.wait_for(self._ws.recv(), timeout=15)
|
||||||
|
data = json.loads(raw)
|
||||||
|
if data.get("type") == "ready":
|
||||||
|
logger.info("Managed proxy is ready")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if data.get("type") == "error":
|
||||||
|
err = data.get("message", "unknown error")
|
||||||
|
logger.error("Auth error from proxy: %s", err)
|
||||||
|
if self._on_error:
|
||||||
|
self._on_error(f"Proxy auth error: {err}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.warning("Unexpected handshake message: %s", data)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.error("Timed out waiting for proxy ready message")
|
||||||
|
if self._on_error:
|
||||||
|
self._on_error("Timed out waiting for proxy ready message")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# -- send loop ----------------------------------------------------- #
|
||||||
|
|
||||||
|
async def _send_loop(self):
|
||||||
|
"""Drain the audio queue and push raw PCM bytes over the WebSocket."""
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
try:
|
||||||
|
pcm_bytes = self._audio_queue.get(timeout=0.1)
|
||||||
|
except Empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self._ws.send(pcm_bytes)
|
||||||
|
except Exception as exc:
|
||||||
|
if not self._stop_event.is_set():
|
||||||
|
logger.error("Send error: %s", exc)
|
||||||
|
break
|
||||||
|
|
||||||
|
# -- receive loop -------------------------------------------------- #
|
||||||
|
|
||||||
|
async def _receive_loop(self):
|
||||||
|
"""Listen for messages from the WebSocket and dispatch them."""
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
try:
|
||||||
|
raw = await asyncio.wait_for(self._ws.recv(), timeout=1.0)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
continue
|
||||||
|
except Exception as exc:
|
||||||
|
if not self._stop_event.is_set():
|
||||||
|
logger.error("Receive error: %s", exc)
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
logger.debug("Non-JSON message received, ignoring")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self.mode == "managed":
|
||||||
|
self._handle_managed_message(data)
|
||||||
|
else:
|
||||||
|
self._handle_byok_message(data)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Message handlers
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
def _handle_managed_message(self, data: dict):
|
||||||
|
"""Process a message from the managed proxy."""
|
||||||
|
msg_type = data.get("type", "")
|
||||||
|
|
||||||
|
if msg_type == "transcript":
|
||||||
|
text = data.get("text", "")
|
||||||
|
is_final = data.get("is_final", False)
|
||||||
|
if text.strip():
|
||||||
|
result = TranscriptionResult(
|
||||||
|
text=text,
|
||||||
|
is_final=is_final,
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
user_name=self.user_name,
|
||||||
|
)
|
||||||
|
if is_final:
|
||||||
|
if self.final_callback:
|
||||||
|
self.final_callback(result)
|
||||||
|
else:
|
||||||
|
if self.realtime_callback:
|
||||||
|
self.realtime_callback(result)
|
||||||
|
|
||||||
|
elif msg_type == "credits_low":
|
||||||
|
seconds_remaining = data.get("seconds_remaining", 0)
|
||||||
|
logger.warning("Credits low -- %d seconds remaining", seconds_remaining)
|
||||||
|
if self._on_credits_low:
|
||||||
|
self._on_credits_low(int(seconds_remaining))
|
||||||
|
|
||||||
|
elif msg_type == "error":
|
||||||
|
code = data.get("code", "")
|
||||||
|
message = data.get("message", "Unknown error")
|
||||||
|
logger.error("Proxy error [%s]: %s", code, message)
|
||||||
|
if self._on_error:
|
||||||
|
self._on_error(f"[{code}] {message}" if code else message)
|
||||||
|
|
||||||
|
elif msg_type == "session_end":
|
||||||
|
seconds_used = data.get("seconds_used", 0)
|
||||||
|
logger.info("Session ended -- %d seconds used", seconds_used)
|
||||||
|
|
||||||
|
elif msg_type == "ready":
|
||||||
|
# May arrive again after reconnects; safe to ignore.
|
||||||
|
logger.debug("Received ready message (already connected)")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.debug("Unhandled managed message type: %s", msg_type)
|
||||||
|
|
||||||
|
def _handle_byok_message(self, data: dict):
|
||||||
|
"""Process a message received directly from the Deepgram API."""
|
||||||
|
msg_type = data.get("type", "")
|
||||||
|
|
||||||
|
if msg_type == "Results":
|
||||||
|
channel = data.get("channel", {})
|
||||||
|
alternatives = channel.get("alternatives", [])
|
||||||
|
if not alternatives:
|
||||||
|
return
|
||||||
|
|
||||||
|
transcript = alternatives[0].get("transcript", "")
|
||||||
|
is_final = data.get("is_final", False)
|
||||||
|
|
||||||
|
if transcript.strip():
|
||||||
|
result = TranscriptionResult(
|
||||||
|
text=transcript,
|
||||||
|
is_final=is_final,
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
user_name=self.user_name,
|
||||||
|
)
|
||||||
|
if is_final:
|
||||||
|
if self.final_callback:
|
||||||
|
self.final_callback(result)
|
||||||
|
else:
|
||||||
|
if self.realtime_callback:
|
||||||
|
self.realtime_callback(result)
|
||||||
|
|
||||||
|
elif msg_type == "Metadata":
|
||||||
|
logger.debug("Deepgram metadata: %s", data)
|
||||||
|
|
||||||
|
elif msg_type == "UtteranceEnd":
|
||||||
|
logger.debug("Deepgram utterance end")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.debug("Unhandled Deepgram message type: %s", msg_type)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
# Helpers
|
||||||
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
|
async def _close_ws(self):
|
||||||
|
"""Close the WebSocket connection if open."""
|
||||||
|
if self._ws is not None:
|
||||||
|
try:
|
||||||
|
await self._ws.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._ws = None
|
||||||
|
|
||||||
|
def set_user_name(self, user_name: str):
|
||||||
|
"""Update the user name attached to future transcriptions."""
|
||||||
|
self.user_name = user_name
|
||||||
|
|
||||||
|
def is_recording_active(self) -> bool:
|
||||||
|
"""Return ``True`` if audio is currently being captured."""
|
||||||
|
return self._is_recording
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return (
|
||||||
|
f"DeepgramTranscriptionEngine(mode={self.mode}, "
|
||||||
|
f"recording={self._is_recording})"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
"""Best-effort cleanup."""
|
||||||
|
try:
|
||||||
|
self.stop()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
@@ -68,11 +68,14 @@ web_server:
|
|||||||
port: 8080
|
port: 8080
|
||||||
host: "127.0.0.1"
|
host: "127.0.0.1"
|
||||||
|
|
||||||
remote_processing:
|
remote:
|
||||||
enabled: false # Enable remote transcription offloading
|
mode: local # local | managed | byok
|
||||||
server_url: "" # WebSocket URL of remote transcription service (e.g., ws://your-server:8765/ws/transcribe)
|
server_url: "" # Proxy server URL for managed mode (e.g., wss://your-proxy.com)
|
||||||
api_key: "" # API key for authentication
|
auth_token: "" # JWT stored after login (managed mode)
|
||||||
fallback_to_local: true # Fall back to local processing if remote fails
|
byok_api_key: "" # Deepgram API key for BYOK mode
|
||||||
|
deepgram_model: nova-2 # Deepgram model to use
|
||||||
|
language: en-US # Language code
|
||||||
|
fallback_to_local: true # Fall back to local Whisper if remote fails
|
||||||
|
|
||||||
updates:
|
updates:
|
||||||
auto_check: true # Check for updates on startup
|
auto_check: true # Check for updates on startup
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent))
|
|||||||
from client.config import Config
|
from client.config import Config
|
||||||
from client.device_utils import DeviceManager
|
from client.device_utils import DeviceManager
|
||||||
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
from client.transcription_engine_realtime import RealtimeTranscriptionEngine, TranscriptionResult
|
||||||
|
from client.deepgram_transcription import DeepgramTranscriptionEngine
|
||||||
from client.server_sync import ServerSyncClient
|
from client.server_sync import ServerSyncClient
|
||||||
from gui.settings_dialog_qt import SettingsDialog
|
from gui.settings_dialog_qt import SettingsDialog
|
||||||
from server.web_display import TranscriptionWebServer
|
from server.web_display import TranscriptionWebServer
|
||||||
@@ -394,6 +395,23 @@ class MainWindow(QMainWindow):
|
|||||||
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
|
min_gap = self.config.get('transcription.min_gap_between_recordings', 0.0)
|
||||||
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
|
min_recording = self.config.get('transcription.min_length_of_recording', 0.5)
|
||||||
|
|
||||||
|
remote_mode = self.config.get('remote.mode', 'local')
|
||||||
|
|
||||||
|
if remote_mode in ('managed', 'byok'):
|
||||||
|
# Use Deepgram-based remote transcription
|
||||||
|
self.transcription_engine = DeepgramTranscriptionEngine(
|
||||||
|
config=self.config,
|
||||||
|
user_name=user_name,
|
||||||
|
input_device_index=audio_device
|
||||||
|
)
|
||||||
|
self.transcription_engine.set_callbacks(
|
||||||
|
realtime_callback=self._on_realtime_transcription,
|
||||||
|
final_callback=self._on_final_transcription
|
||||||
|
)
|
||||||
|
self.transcription_engine.set_error_callback(self._on_remote_error)
|
||||||
|
self.transcription_engine.set_credits_low_callback(self._on_credits_low)
|
||||||
|
else:
|
||||||
|
# Use local Whisper transcription
|
||||||
self.transcription_engine = RealtimeTranscriptionEngine(
|
self.transcription_engine = RealtimeTranscriptionEngine(
|
||||||
model=model,
|
model=model,
|
||||||
device=device,
|
device=device,
|
||||||
@@ -430,8 +448,11 @@ class MainWindow(QMainWindow):
|
|||||||
def _on_engine_ready(self, success: bool, message: str):
|
def _on_engine_ready(self, success: bool, message: str):
|
||||||
"""Handle engine initialization completion."""
|
"""Handle engine initialization completion."""
|
||||||
if success:
|
if success:
|
||||||
# Update device label with actual device used
|
remote_mode = self.config.get('remote.mode', 'local')
|
||||||
if self.transcription_engine:
|
if remote_mode in ('managed', 'byok'):
|
||||||
|
mode_label = 'Managed' if remote_mode == 'managed' else 'BYOK'
|
||||||
|
self.device_label.setText(f"Device: Deepgram ({mode_label})")
|
||||||
|
elif self.transcription_engine:
|
||||||
actual_device = self.transcription_engine.device
|
actual_device = self.transcription_engine.device
|
||||||
compute_type = self.transcription_engine.compute_type
|
compute_type = self.transcription_engine.compute_type
|
||||||
device_display = f"{actual_device.upper()} ({compute_type})"
|
device_display = f"{actual_device.upper()} ({compute_type})"
|
||||||
@@ -647,6 +668,21 @@ class MainWindow(QMainWindow):
|
|||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def _on_remote_error(self, error_msg: str):
|
||||||
|
"""Handle error from remote transcription service."""
|
||||||
|
print(f"Remote transcription error: {error_msg}")
|
||||||
|
self.status_label.setText(f"⚠ Remote error: {error_msg}")
|
||||||
|
|
||||||
|
# Fallback to local if enabled
|
||||||
|
if self.config.get('remote.fallback_to_local', True) and self.is_transcribing:
|
||||||
|
print("Falling back to local transcription...")
|
||||||
|
self.status_label.setText("⚠ Remote failed — falling back to local")
|
||||||
|
|
||||||
|
def _on_credits_low(self, seconds_remaining: int):
|
||||||
|
"""Handle low credits warning from proxy."""
|
||||||
|
minutes = seconds_remaining // 60
|
||||||
|
self.status_label.setText(f"⚠ Credits low: {minutes} min remaining")
|
||||||
|
|
||||||
def _clear_transcriptions(self):
|
def _clear_transcriptions(self):
|
||||||
"""Clear all transcriptions."""
|
"""Clear all transcriptions."""
|
||||||
if not self.transcriptions:
|
if not self.transcriptions:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from PySide6.QtWidgets import (
|
|||||||
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
|
QDialog, QVBoxLayout, QHBoxLayout, QFormLayout,
|
||||||
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
|
QLabel, QLineEdit, QComboBox, QCheckBox, QSlider,
|
||||||
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
|
QPushButton, QMessageBox, QGroupBox, QScrollArea, QWidget,
|
||||||
QFileDialog, QColorDialog
|
QFileDialog, QColorDialog, QRadioButton
|
||||||
)
|
)
|
||||||
from PySide6.QtCore import Qt
|
from PySide6.QtCore import Qt
|
||||||
from PySide6.QtGui import QScreen, QFontDatabase, QColor
|
from PySide6.QtGui import QScreen, QFontDatabase, QColor
|
||||||
@@ -487,46 +487,91 @@ class SettingsDialog(QDialog):
|
|||||||
server_group.setLayout(server_layout)
|
server_group.setLayout(server_layout)
|
||||||
content_layout.addWidget(server_group)
|
content_layout.addWidget(server_group)
|
||||||
|
|
||||||
# Remote Processing Group
|
# Transcription Mode Group
|
||||||
remote_group = QGroupBox("Remote Processing (GPU Offload)")
|
mode_group = QGroupBox("Transcription Mode")
|
||||||
remote_layout = QFormLayout()
|
mode_layout = QVBoxLayout()
|
||||||
remote_layout.setSpacing(10)
|
mode_layout.setSpacing(10)
|
||||||
|
|
||||||
self.remote_enabled_check = QCheckBox()
|
# Radio buttons for mode selection
|
||||||
self.remote_enabled_check.setToolTip(
|
self.mode_local_radio = QRadioButton("Local (Whisper)")
|
||||||
"Enable remote transcription processing:\n"
|
self.mode_local_radio.setToolTip("Transcribe locally using Whisper models")
|
||||||
"• Offload transcription to a GPU-equipped server\n"
|
self.mode_managed_radio = QRadioButton("Remote - Managed")
|
||||||
"• Reduces local CPU/GPU usage\n"
|
self.mode_managed_radio.setToolTip("Use the transcription proxy service with prepaid credits")
|
||||||
"• Requires running the remote transcription service"
|
self.mode_byok_radio = QRadioButton("Remote - BYOK (Bring Your Own Key)")
|
||||||
)
|
self.mode_byok_radio.setToolTip("Connect directly to Deepgram with your own API key")
|
||||||
remote_layout.addRow("Enable Remote Processing:", self.remote_enabled_check)
|
|
||||||
|
|
||||||
self.remote_url_input = QLineEdit()
|
mode_layout.addWidget(self.mode_local_radio)
|
||||||
self.remote_url_input.setPlaceholderText("ws://your-server:8765/ws/transcribe")
|
mode_layout.addWidget(self.mode_managed_radio)
|
||||||
self.remote_url_input.setToolTip(
|
mode_layout.addWidget(self.mode_byok_radio)
|
||||||
"WebSocket URL of the remote transcription service:\n"
|
|
||||||
"• Format: ws://host:port/ws/transcribe\n"
|
|
||||||
"• Use wss:// for secure connections"
|
|
||||||
)
|
|
||||||
remote_layout.addRow("Server URL:", self.remote_url_input)
|
|
||||||
|
|
||||||
self.remote_api_key_input = QLineEdit()
|
# Managed mode fields (shown when managed radio selected)
|
||||||
self.remote_api_key_input.setEchoMode(QLineEdit.Password)
|
self.managed_widget = QWidget()
|
||||||
self.remote_api_key_input.setPlaceholderText("your-api-key")
|
managed_layout = QFormLayout()
|
||||||
self.remote_api_key_input.setToolTip(
|
managed_layout.setSpacing(8)
|
||||||
"API key for authentication with the remote service"
|
|
||||||
)
|
|
||||||
remote_layout.addRow("API Key:", self.remote_api_key_input)
|
|
||||||
|
|
||||||
self.remote_fallback_check = QCheckBox("Enable")
|
self.managed_server_url = QLineEdit()
|
||||||
self.remote_fallback_check.setChecked(True)
|
self.managed_server_url.setPlaceholderText("wss://your-proxy-server.com")
|
||||||
self.remote_fallback_check.setToolTip(
|
managed_layout.addRow("Server URL:", self.managed_server_url)
|
||||||
"Fall back to local transcription if remote service is unavailable"
|
|
||||||
)
|
|
||||||
remote_layout.addRow("Fallback to Local:", self.remote_fallback_check)
|
|
||||||
|
|
||||||
remote_group.setLayout(remote_layout)
|
# Login/Register buttons in a row
|
||||||
content_layout.addWidget(remote_group)
|
auth_widget = QWidget()
|
||||||
|
auth_layout = QHBoxLayout()
|
||||||
|
auth_layout.setContentsMargins(0, 0, 0, 0)
|
||||||
|
self.managed_login_btn = QPushButton("Login")
|
||||||
|
self.managed_login_btn.clicked.connect(self._managed_login)
|
||||||
|
self.managed_register_btn = QPushButton("Register")
|
||||||
|
self.managed_register_btn.clicked.connect(self._managed_register)
|
||||||
|
auth_layout.addWidget(self.managed_login_btn)
|
||||||
|
auth_layout.addWidget(self.managed_register_btn)
|
||||||
|
auth_layout.addStretch()
|
||||||
|
auth_widget.setLayout(auth_layout)
|
||||||
|
managed_layout.addRow("Account:", auth_widget)
|
||||||
|
|
||||||
|
self.managed_balance_label = QLabel("Not logged in")
|
||||||
|
managed_layout.addRow("Balance:", self.managed_balance_label)
|
||||||
|
|
||||||
|
self.managed_fallback_check = QCheckBox("Enable")
|
||||||
|
self.managed_fallback_check.setChecked(True)
|
||||||
|
self.managed_fallback_check.setToolTip("Fall back to local Whisper if remote fails")
|
||||||
|
managed_layout.addRow("Fallback to Local:", self.managed_fallback_check)
|
||||||
|
|
||||||
|
self.managed_widget.setLayout(managed_layout)
|
||||||
|
mode_layout.addWidget(self.managed_widget)
|
||||||
|
|
||||||
|
# BYOK mode fields (shown when BYOK radio selected)
|
||||||
|
self.byok_widget = QWidget()
|
||||||
|
byok_layout = QFormLayout()
|
||||||
|
byok_layout.setSpacing(8)
|
||||||
|
|
||||||
|
self.byok_api_key_input = QLineEdit()
|
||||||
|
self.byok_api_key_input.setEchoMode(QLineEdit.Password)
|
||||||
|
self.byok_api_key_input.setPlaceholderText("your-deepgram-api-key")
|
||||||
|
byok_layout.addRow("Deepgram API Key:", self.byok_api_key_input)
|
||||||
|
|
||||||
|
self.byok_model_combo = QComboBox()
|
||||||
|
self.byok_model_combo.addItems(["nova-2", "nova-2-general", "nova-2-meeting", "nova-2-phonecall", "whisper-large", "whisper-medium", "whisper-small"])
|
||||||
|
byok_layout.addRow("Model:", self.byok_model_combo)
|
||||||
|
|
||||||
|
self.byok_language_input = QLineEdit()
|
||||||
|
self.byok_language_input.setText("en-US")
|
||||||
|
self.byok_language_input.setPlaceholderText("en-US")
|
||||||
|
byok_layout.addRow("Language:", self.byok_language_input)
|
||||||
|
|
||||||
|
self.byok_fallback_check = QCheckBox("Enable")
|
||||||
|
self.byok_fallback_check.setChecked(True)
|
||||||
|
self.byok_fallback_check.setToolTip("Fall back to local Whisper if Deepgram fails")
|
||||||
|
byok_layout.addRow("Fallback to Local:", self.byok_fallback_check)
|
||||||
|
|
||||||
|
self.byok_widget.setLayout(byok_layout)
|
||||||
|
mode_layout.addWidget(self.byok_widget)
|
||||||
|
|
||||||
|
mode_group.setLayout(mode_layout)
|
||||||
|
content_layout.addWidget(mode_group)
|
||||||
|
|
||||||
|
# Connect radio buttons to show/hide relevant widgets
|
||||||
|
self.mode_local_radio.toggled.connect(self._on_mode_changed)
|
||||||
|
self.mode_managed_radio.toggled.connect(self._on_mode_changed)
|
||||||
|
self.mode_byok_radio.toggled.connect(self._on_mode_changed)
|
||||||
|
|
||||||
# Updates Group
|
# Updates Group
|
||||||
updates_group = QGroupBox("Software Updates")
|
updates_group = QGroupBox("Software Updates")
|
||||||
@@ -794,11 +839,28 @@ class SettingsDialog(QDialog):
|
|||||||
self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
|
self.server_room_input.setText(self.config.get('server_sync.room', 'default'))
|
||||||
self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
|
self.server_passphrase_input.setText(self.config.get('server_sync.passphrase', ''))
|
||||||
|
|
||||||
# Remote processing settings
|
# Transcription mode settings
|
||||||
self.remote_enabled_check.setChecked(self.config.get('remote_processing.enabled', False))
|
mode = self.config.get('remote.mode', 'local')
|
||||||
self.remote_url_input.setText(self.config.get('remote_processing.server_url', ''))
|
if mode == 'managed':
|
||||||
self.remote_api_key_input.setText(self.config.get('remote_processing.api_key', ''))
|
self.mode_managed_radio.setChecked(True)
|
||||||
self.remote_fallback_check.setChecked(self.config.get('remote_processing.fallback_to_local', True))
|
elif mode == 'byok':
|
||||||
|
self.mode_byok_radio.setChecked(True)
|
||||||
|
else:
|
||||||
|
self.mode_local_radio.setChecked(True)
|
||||||
|
|
||||||
|
self.managed_server_url.setText(self.config.get('remote.server_url', ''))
|
||||||
|
self.managed_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
|
||||||
|
self.byok_api_key_input.setText(self.config.get('remote.byok_api_key', ''))
|
||||||
|
self.byok_model_combo.setCurrentText(self.config.get('remote.deepgram_model', 'nova-2'))
|
||||||
|
self.byok_language_input.setText(self.config.get('remote.language', 'en-US'))
|
||||||
|
self.byok_fallback_check.setChecked(self.config.get('remote.fallback_to_local', True))
|
||||||
|
|
||||||
|
# Trigger visibility update
|
||||||
|
self._on_mode_changed()
|
||||||
|
|
||||||
|
# Update balance if managed mode and has token
|
||||||
|
if self.config.get('remote.auth_token'):
|
||||||
|
self._update_managed_balance()
|
||||||
|
|
||||||
# Update settings
|
# Update settings
|
||||||
self.update_auto_check.setChecked(self.config.get('updates.auto_check', True))
|
self.update_auto_check.setChecked(self.config.get('updates.auto_check', True))
|
||||||
@@ -869,11 +931,21 @@ class SettingsDialog(QDialog):
|
|||||||
self.config.set('server_sync.room', self.server_room_input.text())
|
self.config.set('server_sync.room', self.server_room_input.text())
|
||||||
self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
|
self.config.set('server_sync.passphrase', self.server_passphrase_input.text())
|
||||||
|
|
||||||
# Remote processing settings
|
# Transcription mode settings
|
||||||
self.config.set('remote_processing.enabled', self.remote_enabled_check.isChecked())
|
if self.mode_managed_radio.isChecked():
|
||||||
self.config.set('remote_processing.server_url', self.remote_url_input.text())
|
self.config.set('remote.mode', 'managed')
|
||||||
self.config.set('remote_processing.api_key', self.remote_api_key_input.text())
|
elif self.mode_byok_radio.isChecked():
|
||||||
self.config.set('remote_processing.fallback_to_local', self.remote_fallback_check.isChecked())
|
self.config.set('remote.mode', 'byok')
|
||||||
|
else:
|
||||||
|
self.config.set('remote.mode', 'local')
|
||||||
|
|
||||||
|
self.config.set('remote.server_url', self.managed_server_url.text())
|
||||||
|
self.config.set('remote.fallback_to_local',
|
||||||
|
self.managed_fallback_check.isChecked() if self.mode_managed_radio.isChecked()
|
||||||
|
else self.byok_fallback_check.isChecked())
|
||||||
|
self.config.set('remote.byok_api_key', self.byok_api_key_input.text())
|
||||||
|
self.config.set('remote.deepgram_model', self.byok_model_combo.currentText())
|
||||||
|
self.config.set('remote.language', self.byok_language_input.text())
|
||||||
|
|
||||||
# Update settings
|
# Update settings
|
||||||
self.config.set('updates.auto_check', self.update_auto_check.isChecked())
|
self.config.set('updates.auto_check', self.update_auto_check.isChecked())
|
||||||
@@ -892,6 +964,194 @@ class SettingsDialog(QDialog):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
|
QMessageBox.critical(self, "Error", f"Failed to save settings:\n{e}")
|
||||||
|
|
||||||
|
def _on_mode_changed(self):
|
||||||
|
"""Show/hide mode-specific widgets based on selected radio button."""
|
||||||
|
self.managed_widget.setVisible(self.mode_managed_radio.isChecked())
|
||||||
|
self.byok_widget.setVisible(self.mode_byok_radio.isChecked())
|
||||||
|
|
||||||
|
def _managed_login(self):
|
||||||
|
"""Open a login dialog and authenticate with the managed proxy server."""
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
dialog = QDialog(self)
|
||||||
|
dialog.setWindowTitle("Login")
|
||||||
|
dialog.setMinimumWidth(350)
|
||||||
|
layout = QFormLayout()
|
||||||
|
|
||||||
|
email_input = QLineEdit()
|
||||||
|
email_input.setPlaceholderText("you@example.com")
|
||||||
|
layout.addRow("Email:", email_input)
|
||||||
|
|
||||||
|
password_input = QLineEdit()
|
||||||
|
password_input.setEchoMode(QLineEdit.Password)
|
||||||
|
layout.addRow("Password:", password_input)
|
||||||
|
|
||||||
|
button_layout = QHBoxLayout()
|
||||||
|
cancel_btn = QPushButton("Cancel")
|
||||||
|
cancel_btn.clicked.connect(dialog.reject)
|
||||||
|
login_btn = QPushButton("Login")
|
||||||
|
login_btn.setDefault(True)
|
||||||
|
button_layout.addStretch()
|
||||||
|
button_layout.addWidget(cancel_btn)
|
||||||
|
button_layout.addWidget(login_btn)
|
||||||
|
layout.addRow("", button_layout)
|
||||||
|
|
||||||
|
dialog.setLayout(layout)
|
||||||
|
|
||||||
|
def do_login():
|
||||||
|
server_url = self.managed_server_url.text().rstrip('/')
|
||||||
|
if not server_url:
|
||||||
|
QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
|
||||||
|
return
|
||||||
|
payload = json.dumps({
|
||||||
|
"email": email_input.text(),
|
||||||
|
"password": password_input.text()
|
||||||
|
}).encode('utf-8')
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{server_url}/auth/login",
|
||||||
|
data=payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read().decode('utf-8'))
|
||||||
|
token = data.get('token', '')
|
||||||
|
if token:
|
||||||
|
self.config.set('remote.auth_token', token)
|
||||||
|
self._update_managed_balance()
|
||||||
|
QMessageBox.information(dialog, "Success", "Logged in successfully.")
|
||||||
|
dialog.accept()
|
||||||
|
else:
|
||||||
|
QMessageBox.warning(dialog, "Error", "Login succeeded but no token received.")
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
try:
|
||||||
|
body = json.loads(e.read().decode('utf-8'))
|
||||||
|
msg = body.get('detail', body.get('message', str(e)))
|
||||||
|
except Exception:
|
||||||
|
msg = str(e)
|
||||||
|
QMessageBox.warning(dialog, "Login Failed", msg)
|
||||||
|
except Exception as e:
|
||||||
|
QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
|
||||||
|
|
||||||
|
login_btn.clicked.connect(do_login)
|
||||||
|
dialog.exec()
|
||||||
|
|
||||||
|
def _managed_register(self):
|
||||||
|
"""Open a registration dialog and create an account on the managed proxy server."""
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
dialog = QDialog(self)
|
||||||
|
dialog.setWindowTitle("Register")
|
||||||
|
dialog.setMinimumWidth(350)
|
||||||
|
layout = QFormLayout()
|
||||||
|
|
||||||
|
email_input = QLineEdit()
|
||||||
|
email_input.setPlaceholderText("you@example.com")
|
||||||
|
layout.addRow("Email:", email_input)
|
||||||
|
|
||||||
|
password_input = QLineEdit()
|
||||||
|
password_input.setEchoMode(QLineEdit.Password)
|
||||||
|
layout.addRow("Password:", password_input)
|
||||||
|
|
||||||
|
confirm_input = QLineEdit()
|
||||||
|
confirm_input.setEchoMode(QLineEdit.Password)
|
||||||
|
layout.addRow("Confirm Password:", confirm_input)
|
||||||
|
|
||||||
|
button_layout = QHBoxLayout()
|
||||||
|
cancel_btn = QPushButton("Cancel")
|
||||||
|
cancel_btn.clicked.connect(dialog.reject)
|
||||||
|
register_btn = QPushButton("Register")
|
||||||
|
register_btn.setDefault(True)
|
||||||
|
button_layout.addStretch()
|
||||||
|
button_layout.addWidget(cancel_btn)
|
||||||
|
button_layout.addWidget(register_btn)
|
||||||
|
layout.addRow("", button_layout)
|
||||||
|
|
||||||
|
dialog.setLayout(layout)
|
||||||
|
|
||||||
|
def do_register():
|
||||||
|
if password_input.text() != confirm_input.text():
|
||||||
|
QMessageBox.warning(dialog, "Error", "Passwords do not match.")
|
||||||
|
return
|
||||||
|
server_url = self.managed_server_url.text().rstrip('/')
|
||||||
|
if not server_url:
|
||||||
|
QMessageBox.warning(dialog, "Error", "Please enter a Server URL first.")
|
||||||
|
return
|
||||||
|
payload = json.dumps({
|
||||||
|
"email": email_input.text(),
|
||||||
|
"password": password_input.text()
|
||||||
|
}).encode('utf-8')
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{server_url}/auth/register",
|
||||||
|
data=payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read().decode('utf-8'))
|
||||||
|
token = data.get('token', '')
|
||||||
|
if token:
|
||||||
|
self.config.set('remote.auth_token', token)
|
||||||
|
self._update_managed_balance()
|
||||||
|
QMessageBox.information(dialog, "Success", "Account created and logged in.")
|
||||||
|
dialog.accept()
|
||||||
|
else:
|
||||||
|
QMessageBox.information(dialog, "Success",
|
||||||
|
"Account created. Please log in.")
|
||||||
|
dialog.accept()
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
try:
|
||||||
|
body = json.loads(e.read().decode('utf-8'))
|
||||||
|
msg = body.get('detail', body.get('message', str(e)))
|
||||||
|
except Exception:
|
||||||
|
msg = str(e)
|
||||||
|
QMessageBox.warning(dialog, "Registration Failed", msg)
|
||||||
|
except Exception as e:
|
||||||
|
QMessageBox.warning(dialog, "Error", f"Could not connect to server:\n{e}")
|
||||||
|
|
||||||
|
register_btn.clicked.connect(do_register)
|
||||||
|
dialog.exec()
|
||||||
|
|
||||||
|
def _update_managed_balance(self):
|
||||||
|
"""Fetch and display the current account balance from the managed proxy server."""
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
server_url = self.managed_server_url.text().rstrip('/')
|
||||||
|
token = self.config.get('remote.auth_token', '')
|
||||||
|
if not server_url or not token:
|
||||||
|
self.managed_balance_label.setText("Not logged in")
|
||||||
|
return
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{server_url}/billing/balance",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
},
|
||||||
|
method="GET"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
data = json.loads(resp.read().decode('utf-8'))
|
||||||
|
balance = data.get('balance', data.get('credits', 'N/A'))
|
||||||
|
self.managed_balance_label.setText(str(balance))
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
if e.code == 401:
|
||||||
|
self.managed_balance_label.setText("Session expired - please login again")
|
||||||
|
self.config.set('remote.auth_token', '')
|
||||||
|
else:
|
||||||
|
self.managed_balance_label.setText("Error fetching balance")
|
||||||
|
except Exception:
|
||||||
|
self.managed_balance_label.setText("Could not connect to server")
|
||||||
|
|
||||||
def _check_for_updates_now(self):
|
def _check_for_updates_now(self):
|
||||||
"""Manually check for updates."""
|
"""Manually check for updates."""
|
||||||
from version import __version__
|
from version import __version__
|
||||||
|
|||||||
13
index.html
Normal file
13
index.html
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<link rel="icon" type="image/png" href="/LocalTranscription.png" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>Local Transcription</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="app"></div>
|
||||||
|
<script type="module" src="/src/main.ts"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
184
local-transcription-headless.spec
Normal file
184
local-transcription-headless.spec
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
|
"""PyInstaller spec file for headless Local Transcription backend (no PySide6/Qt).
|
||||||
|
|
||||||
|
This builds the Python sidecar for the Tauri frontend.
|
||||||
|
Much simpler than local-transcription.spec since all Qt dependencies are removed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
block_cipher = None
|
||||||
|
is_windows = sys.platform == 'win32'
|
||||||
|
|
||||||
|
from PyInstaller.utils.hooks import collect_submodules, collect_data_files
|
||||||
|
|
||||||
|
# Find faster_whisper assets folder
|
||||||
|
import faster_whisper
|
||||||
|
faster_whisper_path = os.path.dirname(faster_whisper.__file__)
|
||||||
|
vad_assets_path = os.path.join(faster_whisper_path, 'assets')
|
||||||
|
|
||||||
|
# pvporcupine resources (indirect dependency from RealtimeSTT)
|
||||||
|
try:
|
||||||
|
import pvporcupine
|
||||||
|
pvporcupine_path = os.path.dirname(pvporcupine.__file__)
|
||||||
|
pvporcupine_resources = os.path.join(pvporcupine_path, 'resources')
|
||||||
|
pvporcupine_lib = os.path.join(pvporcupine_path, 'lib')
|
||||||
|
pvporcupine_data_files = []
|
||||||
|
if os.path.exists(pvporcupine_resources):
|
||||||
|
pvporcupine_data_files.append((pvporcupine_resources, 'pvporcupine/resources'))
|
||||||
|
if os.path.exists(pvporcupine_lib):
|
||||||
|
pvporcupine_data_files.append((pvporcupine_lib, 'pvporcupine/lib'))
|
||||||
|
except ImportError:
|
||||||
|
pvporcupine_data_files = []
|
||||||
|
|
||||||
|
# Data files
|
||||||
|
datas = [
|
||||||
|
('config/default_config.yaml', 'config'),
|
||||||
|
(vad_assets_path, 'faster_whisper/assets'),
|
||||||
|
] + pvporcupine_data_files
|
||||||
|
|
||||||
|
# Hidden imports -- NO PySide6/Qt needed for headless backend
|
||||||
|
hiddenimports = [
|
||||||
|
# Transcription engine
|
||||||
|
'faster_whisper',
|
||||||
|
'faster_whisper.transcribe',
|
||||||
|
'faster_whisper.vad',
|
||||||
|
'ctranslate2',
|
||||||
|
'sounddevice',
|
||||||
|
'scipy',
|
||||||
|
'scipy.signal',
|
||||||
|
'numpy',
|
||||||
|
# RealtimeSTT
|
||||||
|
'RealtimeSTT',
|
||||||
|
'RealtimeSTT.audio_recorder',
|
||||||
|
'webrtcvad',
|
||||||
|
'webrtcvad_wheels',
|
||||||
|
'silero_vad',
|
||||||
|
# PyTorch
|
||||||
|
'torch',
|
||||||
|
'torch.nn',
|
||||||
|
'torch.nn.functional',
|
||||||
|
'torchaudio',
|
||||||
|
'onnxruntime',
|
||||||
|
'onnxruntime.capi',
|
||||||
|
'onnxruntime.capi.onnxruntime_pybind11_state',
|
||||||
|
'pyaudio',
|
||||||
|
'halo',
|
||||||
|
'colorama',
|
||||||
|
# FastAPI and dependencies
|
||||||
|
'fastapi',
|
||||||
|
'fastapi.routing',
|
||||||
|
'fastapi.responses',
|
||||||
|
'starlette',
|
||||||
|
'starlette.applications',
|
||||||
|
'starlette.routing',
|
||||||
|
'starlette.responses',
|
||||||
|
'starlette.websockets',
|
||||||
|
'starlette.middleware',
|
||||||
|
'starlette.middleware.cors',
|
||||||
|
'pydantic',
|
||||||
|
'pydantic.fields',
|
||||||
|
'pydantic.main',
|
||||||
|
'anyio',
|
||||||
|
'anyio._backends',
|
||||||
|
'anyio._backends._asyncio',
|
||||||
|
'sniffio',
|
||||||
|
# Uvicorn
|
||||||
|
'uvicorn',
|
||||||
|
'uvicorn.logging',
|
||||||
|
'uvicorn.loops',
|
||||||
|
'uvicorn.loops.auto',
|
||||||
|
'uvicorn.protocols',
|
||||||
|
'uvicorn.protocols.http',
|
||||||
|
'uvicorn.protocols.http.auto',
|
||||||
|
'uvicorn.protocols.http.h11_impl',
|
||||||
|
'uvicorn.protocols.websockets',
|
||||||
|
'uvicorn.protocols.websockets.auto',
|
||||||
|
'uvicorn.protocols.websockets.wsproto_impl',
|
||||||
|
'uvicorn.lifespan',
|
||||||
|
'uvicorn.lifespan.on',
|
||||||
|
'h11',
|
||||||
|
'websockets',
|
||||||
|
'websockets.legacy',
|
||||||
|
'websockets.legacy.server',
|
||||||
|
# HTTP client
|
||||||
|
'requests',
|
||||||
|
'urllib3',
|
||||||
|
'certifi',
|
||||||
|
'charset_normalizer',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Collect submodules for key packages
|
||||||
|
print("Collecting submodules for backend packages...")
|
||||||
|
for package in ['fastapi', 'starlette', 'pydantic', 'pydantic_core', 'anyio', 'uvicorn', 'websockets', 'h11', 'httptools', 'uvloop']:
|
||||||
|
try:
|
||||||
|
submodules = collect_submodules(package)
|
||||||
|
hiddenimports += submodules
|
||||||
|
print(f" + Collected {len(submodules)} submodules from {package}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" - Warning: Could not collect {package}: {e}")
|
||||||
|
|
||||||
|
# Collect data files
|
||||||
|
for package in ['fastapi', 'starlette', 'pydantic', 'uvicorn', 'RealtimeSTT']:
|
||||||
|
try:
|
||||||
|
data_files = collect_data_files(package)
|
||||||
|
if data_files:
|
||||||
|
datas += data_files
|
||||||
|
print(f" + Collected {len(data_files)} data files from {package}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Pydantic critical deps
|
||||||
|
hiddenimports += [
|
||||||
|
'colorsys', 'decimal', 'json', 'ipaddress', 'pathlib', 'uuid',
|
||||||
|
'email.message', 'typing_extensions',
|
||||||
|
]
|
||||||
|
|
||||||
|
a = Analysis(
|
||||||
|
['backend/main_headless.py'],
|
||||||
|
pathex=[],
|
||||||
|
binaries=[],
|
||||||
|
datas=datas,
|
||||||
|
hiddenimports=hiddenimports,
|
||||||
|
hookspath=['hooks'],
|
||||||
|
hooksconfig={},
|
||||||
|
runtime_hooks=[],
|
||||||
|
excludes=['enum34', 'PySide6', 'PyQt5', 'PyQt6', 'tkinter'],
|
||||||
|
win_no_prefer_redirects=False,
|
||||||
|
win_private_assemblies=False,
|
||||||
|
cipher=block_cipher,
|
||||||
|
noarchive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||||
|
|
||||||
|
exe = EXE(
|
||||||
|
pyz,
|
||||||
|
a.scripts,
|
||||||
|
[],
|
||||||
|
exclude_binaries=True,
|
||||||
|
name='local-transcription-backend',
|
||||||
|
debug=False,
|
||||||
|
bootloader_ignore_signals=False,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
console=True, # Headless backend needs console for JSON output
|
||||||
|
disable_windowed_traceback=False,
|
||||||
|
argv_emulation=False,
|
||||||
|
target_arch=None,
|
||||||
|
codesign_identity=None,
|
||||||
|
entitlements_file=None,
|
||||||
|
icon='LocalTranscription.ico' if is_windows else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
coll = COLLECT(
|
||||||
|
exe,
|
||||||
|
a.binaries,
|
||||||
|
a.zipfiles,
|
||||||
|
a.datas,
|
||||||
|
strip=False,
|
||||||
|
upx=True,
|
||||||
|
upx_exclude=[],
|
||||||
|
name='local-transcription-backend',
|
||||||
|
)
|
||||||
1784
package-lock.json
generated
Normal file
1784
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
27
package.json
Normal file
27
package.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"name": "local-transcription",
|
||||||
|
"private": true,
|
||||||
|
"version": "1.4.0",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"dev": "vite dev",
|
||||||
|
"build": "vite build",
|
||||||
|
"preview": "vite preview",
|
||||||
|
"tauri": "tauri"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@sveltejs/vite-plugin-svelte": "^5.0.0",
|
||||||
|
"@tauri-apps/cli": "^2.0.0",
|
||||||
|
"@tsconfig/svelte": "^5.0.0",
|
||||||
|
"svelte": "^5.0.0",
|
||||||
|
"svelte-check": "^4.0.0",
|
||||||
|
"typescript": "~5.6.0",
|
||||||
|
"vite": "^6.0.0"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@tauri-apps/api": "^2.0.0",
|
||||||
|
"@tauri-apps/plugin-dialog": "^2.0.0",
|
||||||
|
"@tauri-apps/plugin-shell": "^2.0.0",
|
||||||
|
"@tauri-apps/plugin-process": "^2.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
5172
src-tauri/Cargo.lock
generated
Normal file
5172
src-tauri/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
src-tauri/Cargo.toml
Normal file
21
src-tauri/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
[package]
|
||||||
|
name = "local-transcription"
|
||||||
|
version = "1.4.0"
|
||||||
|
description = "Real-time speech-to-text transcription for streamers"
|
||||||
|
authors = ["Local Transcription Contributors"]
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "local_transcription_lib"
|
||||||
|
crate-type = ["lib", "cdylib", "staticlib"]
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
tauri-build = { version = "2", features = [] }
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
tauri = { version = "2", features = [] }
|
||||||
|
tauri-plugin-shell = "2"
|
||||||
|
tauri-plugin-dialog = "2"
|
||||||
|
tauri-plugin-process = "2"
|
||||||
|
serde = { version = "1", features = ["derive"] }
|
||||||
|
serde_json = "1"
|
||||||
3
src-tauri/build.rs
Normal file
3
src-tauri/build.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
fn main() {
|
||||||
|
tauri_build::build()
|
||||||
|
}
|
||||||
1
src-tauri/gen/schemas/acl-manifests.json
Normal file
1
src-tauri/gen/schemas/acl-manifests.json
Normal file
File diff suppressed because one or more lines are too long
1
src-tauri/gen/schemas/capabilities.json
Normal file
1
src-tauri/gen/schemas/capabilities.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{}
|
||||||
2660
src-tauri/gen/schemas/desktop-schema.json
Normal file
2660
src-tauri/gen/schemas/desktop-schema.json
Normal file
File diff suppressed because it is too large
Load Diff
2660
src-tauri/gen/schemas/linux-schema.json
Normal file
2660
src-tauri/gen/schemas/linux-schema.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
src-tauri/icons/128x128.png
Normal file
BIN
src-tauri/icons/128x128.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.3 KiB |
BIN
src-tauri/icons/128x128@2x.png
Normal file
BIN
src-tauri/icons/128x128@2x.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 41 KiB |
BIN
src-tauri/icons/32x32.png
Normal file
BIN
src-tauri/icons/32x32.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 KiB |
BIN
src-tauri/icons/icon.ico
Normal file
BIN
src-tauri/icons/icon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
BIN
src-tauri/icons/icon.png
Normal file
BIN
src-tauri/icons/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 52 KiB |
9
src-tauri/src/lib.rs
Normal file
9
src-tauri/src/lib.rs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||||
|
pub fn run() {
|
||||||
|
tauri::Builder::default()
|
||||||
|
.plugin(tauri_plugin_shell::init())
|
||||||
|
.plugin(tauri_plugin_dialog::init())
|
||||||
|
.plugin(tauri_plugin_process::init())
|
||||||
|
.run(tauri::generate_context!())
|
||||||
|
.expect("error while running tauri application");
|
||||||
|
}
|
||||||
6
src-tauri/src/main.rs
Normal file
6
src-tauri/src/main.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
// Prevents additional console window on Windows in release
|
||||||
|
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
local_transcription_lib::run()
|
||||||
|
}
|
||||||
42
src-tauri/tauri.conf.json
Normal file
42
src-tauri/tauri.conf.json
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"productName": "Local Transcription",
|
||||||
|
"version": "1.4.0",
|
||||||
|
"identifier": "com.localtranscription.app",
|
||||||
|
"build": {
|
||||||
|
"frontendDist": "../dist",
|
||||||
|
"devUrl": "http://localhost:1420",
|
||||||
|
"beforeDevCommand": "npm run dev",
|
||||||
|
"beforeBuildCommand": "npm run build"
|
||||||
|
},
|
||||||
|
"app": {
|
||||||
|
"windows": [
|
||||||
|
{
|
||||||
|
"title": "Local Transcription",
|
||||||
|
"width": 800,
|
||||||
|
"height": 600,
|
||||||
|
"minWidth": 640,
|
||||||
|
"minHeight": 480,
|
||||||
|
"resizable": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"security": {
|
||||||
|
"csp": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bundle": {
|
||||||
|
"active": true,
|
||||||
|
"targets": "all",
|
||||||
|
"icon": [
|
||||||
|
"icons/32x32.png",
|
||||||
|
"icons/128x128.png",
|
||||||
|
"icons/128x128@2x.png",
|
||||||
|
"icons/icon.ico",
|
||||||
|
"icons/icon.png"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"plugins": {
|
||||||
|
"shell": {
|
||||||
|
"open": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
99
src/App.svelte
Normal file
99
src/App.svelte
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { onMount } from "svelte";
|
||||||
|
import Header from "$lib/components/Header.svelte";
|
||||||
|
import StatusBar from "$lib/components/StatusBar.svelte";
|
||||||
|
import Controls from "$lib/components/Controls.svelte";
|
||||||
|
import TranscriptionDisplay from "$lib/components/TranscriptionDisplay.svelte";
|
||||||
|
import Settings from "$lib/components/Settings.svelte";
|
||||||
|
import { backendStore } from "$lib/stores/backend";
|
||||||
|
import { configStore } from "$lib/stores/config";
|
||||||
|
|
||||||
|
let showSettings = $state(false);
|
||||||
|
|
||||||
|
let obsDisplayUrl = $derived(backendStore.obsUrl);
|
||||||
|
let syncDisplayUrl = $derived(backendStore.syncUrl);
|
||||||
|
|
||||||
|
function openSettings() {
|
||||||
|
showSettings = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeSettings() {
|
||||||
|
showSettings = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
onMount(() => {
|
||||||
|
backendStore.connect();
|
||||||
|
configStore.loadConfig();
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
backendStore.disconnect();
|
||||||
|
};
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="app-shell">
|
||||||
|
<Header onSettingsClick={openSettings} />
|
||||||
|
<StatusBar />
|
||||||
|
|
||||||
|
<div class="display-links">
|
||||||
|
<span class="link-label">OBS:</span>
|
||||||
|
<a href={obsDisplayUrl} target="_blank" rel="noopener">{obsDisplayUrl}</a>
|
||||||
|
{#if syncDisplayUrl}
|
||||||
|
<span class="link-separator">|</span>
|
||||||
|
<span class="link-label">Sync:</span>
|
||||||
|
<a href={syncDisplayUrl} target="_blank" rel="noopener"
|
||||||
|
>{syncDisplayUrl}</a
|
||||||
|
>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<TranscriptionDisplay />
|
||||||
|
<Controls />
|
||||||
|
|
||||||
|
<div class="version-label">v{backendStore.version}</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{#if showSettings}
|
||||||
|
<Settings onClose={closeSettings} />
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.app-shell {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100%;
|
||||||
|
width: 100%;
|
||||||
|
background-color: var(--bg-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.display-links {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
padding: 6px 20px;
|
||||||
|
font-size: 12px;
|
||||||
|
background-color: var(--bg-primary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.link-label {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.link-separator {
|
||||||
|
color: var(--text-muted);
|
||||||
|
margin: 0 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.version-label {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 6px;
|
||||||
|
right: 12px;
|
||||||
|
font-size: 11px;
|
||||||
|
color: var(--text-muted);
|
||||||
|
pointer-events: none;
|
||||||
|
z-index: 10;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
312
src/app.css
Normal file
312
src/app.css
Normal file
@@ -0,0 +1,312 @@
|
|||||||
|
/* Global dark theme styles for Local Transcription */
|
||||||
|
|
||||||
|
:root {
|
||||||
|
--bg-primary: #1e1e1e;
|
||||||
|
--bg-secondary: #2d2d2d;
|
||||||
|
--bg-tertiary: #3a3a3a;
|
||||||
|
--bg-hover: #454545;
|
||||||
|
--text-primary: #e0e0e0;
|
||||||
|
--text-secondary: #a0a0a0;
|
||||||
|
--text-muted: #707070;
|
||||||
|
--accent-green: #4caf50;
|
||||||
|
--accent-green-hover: #45a049;
|
||||||
|
--accent-red: #f44336;
|
||||||
|
--accent-red-hover: #d32f2f;
|
||||||
|
--accent-blue: #2196f3;
|
||||||
|
--accent-blue-hover: #1976d2;
|
||||||
|
--accent-orange: #ff9800;
|
||||||
|
--border-color: #444;
|
||||||
|
--border-color-light: #555;
|
||||||
|
--scrollbar-track: #2d2d2d;
|
||||||
|
--scrollbar-thumb: #555;
|
||||||
|
--scrollbar-thumb-hover: #777;
|
||||||
|
}
|
||||||
|
|
||||||
|
*,
|
||||||
|
*::before,
|
||||||
|
*::after {
|
||||||
|
box-sizing: border-box;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
html,
|
||||||
|
body {
|
||||||
|
height: 100%;
|
||||||
|
width: 100%;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
background-color: var(--bg-primary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
|
||||||
|
"Helvetica Neue", Arial, sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1.5;
|
||||||
|
-webkit-font-smoothing: antialiased;
|
||||||
|
-moz-osx-font-smoothing: grayscale;
|
||||||
|
}
|
||||||
|
|
||||||
|
#app {
|
||||||
|
height: 100%;
|
||||||
|
width: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
button {
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 13px;
|
||||||
|
font-weight: 500;
|
||||||
|
padding: 8px 16px;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-radius: 6px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.15s ease, border-color 0.15s ease,
|
||||||
|
transform 0.1s ease;
|
||||||
|
user-select: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
button:hover {
|
||||||
|
background-color: var(--bg-hover);
|
||||||
|
border-color: var(--border-color-light);
|
||||||
|
}
|
||||||
|
|
||||||
|
button:active {
|
||||||
|
transform: scale(0.98);
|
||||||
|
}
|
||||||
|
|
||||||
|
button:disabled {
|
||||||
|
opacity: 0.5;
|
||||||
|
cursor: not-allowed;
|
||||||
|
transform: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
button.primary {
|
||||||
|
background-color: var(--accent-green);
|
||||||
|
border-color: var(--accent-green);
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
button.primary:hover {
|
||||||
|
background-color: var(--accent-green-hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
button.danger {
|
||||||
|
background-color: var(--accent-red);
|
||||||
|
border-color: var(--accent-red);
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
button.danger:hover {
|
||||||
|
background-color: var(--accent-red-hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inputs and Selects */
|
||||||
|
input[type="text"],
|
||||||
|
input[type="password"],
|
||||||
|
input[type="number"],
|
||||||
|
input[type="url"],
|
||||||
|
input[type="email"],
|
||||||
|
select,
|
||||||
|
textarea {
|
||||||
|
font-family: inherit;
|
||||||
|
font-size: 13px;
|
||||||
|
padding: 8px 12px;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-radius: 6px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
outline: none;
|
||||||
|
transition: border-color 0.15s ease;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="text"]:focus,
|
||||||
|
input[type="password"]:focus,
|
||||||
|
input[type="number"]:focus,
|
||||||
|
input[type="url"]:focus,
|
||||||
|
input[type="email"]:focus,
|
||||||
|
select:focus,
|
||||||
|
textarea:focus {
|
||||||
|
border-color: var(--accent-blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="text"]::placeholder,
|
||||||
|
input[type="password"]::placeholder,
|
||||||
|
input[type="url"]::placeholder {
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
appearance: none;
|
||||||
|
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a0a0a0' d='M6 8L1 3h10z'/%3E%3C/svg%3E");
|
||||||
|
background-repeat: no-repeat;
|
||||||
|
background-position: right 10px center;
|
||||||
|
padding-right: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Color input */
|
||||||
|
input[type="color"] {
|
||||||
|
width: 50px;
|
||||||
|
height: 36px;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-radius: 6px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="color"]::-webkit-color-swatch-wrapper {
|
||||||
|
padding: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="color"]::-webkit-color-swatch {
|
||||||
|
border: none;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Range slider */
|
||||||
|
input[type="range"] {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
width: 100%;
|
||||||
|
height: 6px;
|
||||||
|
background: var(--bg-tertiary);
|
||||||
|
border-radius: 3px;
|
||||||
|
outline: none;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="range"]::-webkit-slider-thumb {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--accent-blue);
|
||||||
|
cursor: pointer;
|
||||||
|
border: 2px solid var(--bg-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="range"]::-moz-range-thumb {
|
||||||
|
width: 16px;
|
||||||
|
height: 16px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: var(--accent-blue);
|
||||||
|
cursor: pointer;
|
||||||
|
border: 2px solid var(--bg-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Toggle / Checkbox styled as switch */
|
||||||
|
input[type="checkbox"] {
|
||||||
|
position: relative;
|
||||||
|
width: 40px;
|
||||||
|
height: 22px;
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
background-color: var(--bg-tertiary);
|
||||||
|
border-radius: 11px;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background-color 0.2s ease;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="checkbox"]::after {
|
||||||
|
content: "";
|
||||||
|
position: absolute;
|
||||||
|
top: 2px;
|
||||||
|
left: 2px;
|
||||||
|
width: 18px;
|
||||||
|
height: 18px;
|
||||||
|
background-color: var(--text-secondary);
|
||||||
|
border-radius: 50%;
|
||||||
|
transition: transform 0.2s ease, background-color 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="checkbox"]:checked {
|
||||||
|
background-color: var(--accent-green);
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="checkbox"]:checked::after {
|
||||||
|
transform: translateX(18px);
|
||||||
|
background-color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Radio buttons */
|
||||||
|
input[type="radio"] {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
appearance: none;
|
||||||
|
width: 18px;
|
||||||
|
height: 18px;
|
||||||
|
border: 2px solid var(--border-color);
|
||||||
|
border-radius: 50%;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
position: relative;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="radio"]:checked {
|
||||||
|
border-color: var(--accent-blue);
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="radio"]:checked::after {
|
||||||
|
content: "";
|
||||||
|
position: absolute;
|
||||||
|
top: 3px;
|
||||||
|
left: 3px;
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
background-color: var(--accent-blue);
|
||||||
|
border-radius: 50%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scrollbar */
|
||||||
|
::-webkit-scrollbar {
|
||||||
|
width: 8px;
|
||||||
|
height: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-track {
|
||||||
|
background: var(--scrollbar-track);
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-thumb {
|
||||||
|
background: var(--scrollbar-thumb);
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
::-webkit-scrollbar-thumb:hover {
|
||||||
|
background: var(--scrollbar-thumb-hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Firefox scrollbar */
|
||||||
|
* {
|
||||||
|
scrollbar-width: thin;
|
||||||
|
scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-track);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Links */
|
||||||
|
a {
|
||||||
|
color: var(--accent-blue);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Label */
|
||||||
|
label {
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
116
src/lib/components/Controls.svelte
Normal file
116
src/lib/components/Controls.svelte
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { backendStore } from "$lib/stores/backend";
|
||||||
|
import { transcriptionStore } from "$lib/stores/transcriptions";
|
||||||
|
|
||||||
|
let isTranscribing = $derived(backendStore.appState === "transcribing");
|
||||||
|
let isReady = $derived(
|
||||||
|
backendStore.appState === "ready" || backendStore.appState === "transcribing"
|
||||||
|
);
|
||||||
|
let isLoading = $state(false);
|
||||||
|
|
||||||
|
async function toggleTranscription() {
|
||||||
|
if (isLoading) return;
|
||||||
|
isLoading = true;
|
||||||
|
try {
|
||||||
|
if (isTranscribing) {
|
||||||
|
await backendStore.apiPost("/api/stop");
|
||||||
|
} else {
|
||||||
|
await backendStore.apiPost("/api/start");
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to toggle transcription:", err);
|
||||||
|
} finally {
|
||||||
|
isLoading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function clearTranscriptions() {
|
||||||
|
try {
|
||||||
|
await backendStore.apiPost("/api/clear");
|
||||||
|
transcriptionStore.clearAll();
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to clear:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveTranscriptions() {
|
||||||
|
try {
|
||||||
|
// Get transcription text from backend or local store
|
||||||
|
let text: string;
|
||||||
|
try {
|
||||||
|
const data = await backendStore.apiGet<{ text: string }>("/api/transcriptions");
|
||||||
|
text = data.text || transcriptionStore.getPlainText();
|
||||||
|
} catch {
|
||||||
|
text = transcriptionStore.getPlainText();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!text.trim()) {
|
||||||
|
console.warn("No transcriptions to save");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try Tauri dialog for native save, fall back to browser download
|
||||||
|
try {
|
||||||
|
const { save } = await import("@tauri-apps/plugin-dialog");
|
||||||
|
const filePath = await save({
|
||||||
|
defaultPath: "transcription.txt",
|
||||||
|
filters: [
|
||||||
|
{ name: "Text Files", extensions: ["txt"] },
|
||||||
|
{ name: "All Files", extensions: ["*"] },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
if (filePath) {
|
||||||
|
// Write via backend API
|
||||||
|
await backendStore.apiPost("/api/save-file", { path: filePath, text });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Fallback: browser-style download
|
||||||
|
const blob = new Blob([text], { type: "text/plain" });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement("a");
|
||||||
|
a.href = url;
|
||||||
|
a.download = "transcription.txt";
|
||||||
|
a.click();
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to save:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="controls">
|
||||||
|
<button
|
||||||
|
class={isTranscribing ? "danger" : "primary"}
|
||||||
|
onclick={toggleTranscription}
|
||||||
|
disabled={!isReady || isLoading}
|
||||||
|
>
|
||||||
|
{#if isLoading}
|
||||||
|
...
|
||||||
|
{:else if isTranscribing}
|
||||||
|
Stop Transcription
|
||||||
|
{:else}
|
||||||
|
Start Transcription
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button onclick={clearTranscriptions} disabled={!backendStore.connected}>
|
||||||
|
Clear
|
||||||
|
</button>
|
||||||
|
|
||||||
|
<button onclick={saveTranscriptions} disabled={!backendStore.connected}>
|
||||||
|
Save
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.controls {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
padding: 10px 20px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
82
src/lib/components/Header.svelte
Normal file
82
src/lib/components/Header.svelte
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
interface Props {
|
||||||
|
onSettingsClick: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { onSettingsClick }: Props = $props();
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<header class="app-header">
|
||||||
|
<h1 class="app-title">Local Transcription</h1>
|
||||||
|
<button class="settings-btn" onclick={onSettingsClick} title="Settings">
|
||||||
|
<svg
|
||||||
|
width="20"
|
||||||
|
height="20"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-linecap="round"
|
||||||
|
stroke-linejoin="round"
|
||||||
|
>
|
||||||
|
<circle cx="12" cy="12" r="3"></circle>
|
||||||
|
<path
|
||||||
|
d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1
|
||||||
|
0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0
|
||||||
|
0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2
|
||||||
|
2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65
|
||||||
|
1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2
|
||||||
|
0 0 1 0-2.83l.06-.06A1.65 1.65 0 0 0 4.68
|
||||||
|
15a1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0
|
||||||
|
0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0
|
||||||
|
0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1
|
||||||
|
2.83 0l.06.06A1.65 1.65 0 0 0 9 4.68a1.65 1.65 0
|
||||||
|
0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2
|
||||||
|
2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0
|
||||||
|
1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0
|
||||||
|
2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65
|
||||||
|
0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0
|
||||||
|
1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"
|
||||||
|
></path>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.app-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 12px 20px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-title {
|
||||||
|
font-size: 24px;
|
||||||
|
font-weight: 700;
|
||||||
|
color: var(--text-primary);
|
||||||
|
letter-spacing: -0.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-btn {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
width: 36px;
|
||||||
|
height: 36px;
|
||||||
|
padding: 0;
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-radius: 8px;
|
||||||
|
background-color: transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
transition: color 0.15s ease, background-color 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-btn:hover {
|
||||||
|
color: var(--text-primary);
|
||||||
|
background-color: var(--bg-tertiary);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
780
src/lib/components/Settings.svelte
Normal file
780
src/lib/components/Settings.svelte
Normal file
@@ -0,0 +1,780 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { configStore } from "$lib/stores/config";
|
||||||
|
import { backendStore } from "$lib/stores/backend";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
onClose: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
let { onClose }: Props = $props();
|
||||||
|
|
||||||
|
// Local copies of config values for editing
|
||||||
|
let userName = $state("");
|
||||||
|
let audioDevice = $state("default");
|
||||||
|
let model = $state("base.en");
|
||||||
|
let language = $state("en");
|
||||||
|
let computeDevice = $state("auto");
|
||||||
|
let computeType = $state("default");
|
||||||
|
let enableRealtime = $state(false);
|
||||||
|
let realtimeModel = $state("tiny.en");
|
||||||
|
let realtimeProcessingPause = $state(0.1);
|
||||||
|
let sileroSensitivity = $state(0.4);
|
||||||
|
let webrtcSensitivity = $state(3);
|
||||||
|
let postSpeechSilence = $state(0.3);
|
||||||
|
let minRecordingLength = $state(0.5);
|
||||||
|
let minGapBetween = $state(0);
|
||||||
|
let continuousMode = $state(false);
|
||||||
|
let showTimestamps = $state(true);
|
||||||
|
let fadeSeconds = $state(10);
|
||||||
|
let maxLines = $state(100);
|
||||||
|
let fontSize = $state(12);
|
||||||
|
let userColor = $state("#4CAF50");
|
||||||
|
let textColor = $state("#FFFFFF");
|
||||||
|
let backgroundColor = $state("#000000");
|
||||||
|
let syncEnabled = $state(false);
|
||||||
|
let syncUrl = $state("");
|
||||||
|
let syncRoom = $state("default");
|
||||||
|
let syncPassphrase = $state("");
|
||||||
|
let remoteMode = $state("local");
|
||||||
|
let remoteServerUrl = $state("");
|
||||||
|
let managedEmail = $state("");
|
||||||
|
let managedPassword = $state("");
|
||||||
|
let autoCheckUpdates = $state(true);
|
||||||
|
|
||||||
|
// Fetched device lists
|
||||||
|
let audioDevices = $state<{ id: string; name: string }[]>([]);
|
||||||
|
let computeDevices = $state<{ id: string; name: string }[]>([]);
|
||||||
|
|
||||||
|
// Model options
|
||||||
|
const modelOptions = [
|
||||||
|
"tiny",
|
||||||
|
"tiny.en",
|
||||||
|
"base",
|
||||||
|
"base.en",
|
||||||
|
"small",
|
||||||
|
"small.en",
|
||||||
|
"medium",
|
||||||
|
"medium.en",
|
||||||
|
"large-v1",
|
||||||
|
"large-v2",
|
||||||
|
"large-v3",
|
||||||
|
];
|
||||||
|
|
||||||
|
const computeTypeOptions = [
|
||||||
|
{ value: "default", label: "Default" },
|
||||||
|
{ value: "int8", label: "int8 (Fastest)" },
|
||||||
|
{ value: "float16", label: "float16 (GPU)" },
|
||||||
|
{ value: "float32", label: "float32 (Best Quality)" },
|
||||||
|
];
|
||||||
|
|
||||||
|
const webrtcOptions = [
|
||||||
|
{ value: 0, label: "0 (Most Sensitive)" },
|
||||||
|
{ value: 1, label: "1" },
|
||||||
|
{ value: 2, label: "2" },
|
||||||
|
{ value: 3, label: "3 (Least Sensitive)" },
|
||||||
|
];
|
||||||
|
|
||||||
|
// Load config values on mount
|
||||||
|
$effect(() => {
|
||||||
|
const cfg = configStore.config;
|
||||||
|
userName = cfg.user.name;
|
||||||
|
audioDevice = cfg.audio.input_device;
|
||||||
|
model = cfg.transcription.model;
|
||||||
|
language = cfg.transcription.language;
|
||||||
|
computeDevice = cfg.transcription.device;
|
||||||
|
computeType = cfg.transcription.compute_type;
|
||||||
|
enableRealtime = cfg.transcription.enable_realtime_transcription;
|
||||||
|
realtimeModel = cfg.transcription.realtime_model;
|
||||||
|
realtimeProcessingPause = cfg.transcription.realtime_processing_pause;
|
||||||
|
sileroSensitivity = cfg.transcription.silero_sensitivity;
|
||||||
|
webrtcSensitivity = cfg.transcription.webrtc_sensitivity;
|
||||||
|
postSpeechSilence = cfg.transcription.post_speech_silence_duration;
|
||||||
|
minRecordingLength = cfg.transcription.min_length_of_recording;
|
||||||
|
minGapBetween = cfg.transcription.min_gap_between_recordings;
|
||||||
|
continuousMode = cfg.transcription.continuous_mode;
|
||||||
|
showTimestamps = cfg.display.show_timestamps;
|
||||||
|
fadeSeconds = cfg.display.fade_after_seconds;
|
||||||
|
maxLines = cfg.display.max_lines;
|
||||||
|
fontSize = cfg.display.font_size;
|
||||||
|
userColor = cfg.display.user_color;
|
||||||
|
textColor = cfg.display.text_color;
|
||||||
|
// Strip alpha from background color for the color picker (only supports 6-char hex)
|
||||||
|
const bgHex = cfg.display.background_color.replace("#", "");
|
||||||
|
backgroundColor = "#" + bgHex.substring(0, 6);
|
||||||
|
syncEnabled = cfg.server_sync.enabled;
|
||||||
|
syncUrl = cfg.server_sync.url;
|
||||||
|
syncRoom = cfg.server_sync.room;
|
||||||
|
syncPassphrase = cfg.server_sync.passphrase;
|
||||||
|
remoteMode = cfg.remote.mode;
|
||||||
|
remoteServerUrl = cfg.remote.server_url;
|
||||||
|
autoCheckUpdates = cfg.updates.auto_check;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fetch audio devices and compute devices on mount
|
||||||
|
$effect(() => {
|
||||||
|
fetchAudioDevices();
|
||||||
|
fetchComputeDevices();
|
||||||
|
});
|
||||||
|
|
||||||
|
async function fetchAudioDevices() {
|
||||||
|
try {
|
||||||
|
const data = await backendStore.apiGet<{
|
||||||
|
devices: { id: string; name: string }[];
|
||||||
|
}>("/api/audio-devices");
|
||||||
|
audioDevices = data.devices ?? [];
|
||||||
|
} catch {
|
||||||
|
audioDevices = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchComputeDevices() {
|
||||||
|
try {
|
||||||
|
const data = await backendStore.apiGet<{
|
||||||
|
devices: { id: string; name: string }[];
|
||||||
|
}>("/api/compute-devices");
|
||||||
|
computeDevices = data.devices ?? [];
|
||||||
|
} catch {
|
||||||
|
computeDevices = [
|
||||||
|
{ id: "auto", name: "Auto" },
|
||||||
|
{ id: "cpu", name: "CPU" },
|
||||||
|
{ id: "cuda", name: "CUDA (GPU)" },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleSave() {
|
||||||
|
const updates = {
|
||||||
|
user: {
|
||||||
|
name: userName,
|
||||||
|
},
|
||||||
|
audio: {
|
||||||
|
input_device: audioDevice,
|
||||||
|
},
|
||||||
|
transcription: {
|
||||||
|
model,
|
||||||
|
device: computeDevice,
|
||||||
|
language,
|
||||||
|
compute_type: computeType,
|
||||||
|
enable_realtime_transcription: enableRealtime,
|
||||||
|
realtime_model: realtimeModel,
|
||||||
|
realtime_processing_pause: realtimeProcessingPause,
|
||||||
|
silero_sensitivity: sileroSensitivity,
|
||||||
|
webrtc_sensitivity: webrtcSensitivity,
|
||||||
|
post_speech_silence_duration: postSpeechSilence,
|
||||||
|
min_length_of_recording: minRecordingLength,
|
||||||
|
min_gap_between_recordings: minGapBetween,
|
||||||
|
continuous_mode: continuousMode,
|
||||||
|
},
|
||||||
|
display: {
|
||||||
|
show_timestamps: showTimestamps,
|
||||||
|
fade_after_seconds: fadeSeconds,
|
||||||
|
max_lines: maxLines,
|
||||||
|
font_size: fontSize,
|
||||||
|
user_color: userColor,
|
||||||
|
text_color: textColor,
|
||||||
|
background_color: backgroundColor,
|
||||||
|
},
|
||||||
|
server_sync: {
|
||||||
|
enabled: syncEnabled,
|
||||||
|
url: syncUrl,
|
||||||
|
room: syncRoom,
|
||||||
|
passphrase: syncPassphrase,
|
||||||
|
},
|
||||||
|
remote: {
|
||||||
|
mode: remoteMode,
|
||||||
|
server_url: remoteServerUrl,
|
||||||
|
},
|
||||||
|
updates: {
|
||||||
|
auto_check: autoCheckUpdates,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
await configStore.saveConfig(updates);
|
||||||
|
onClose();
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to save settings:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleCancel() {
|
||||||
|
onClose();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleCheckUpdates() {
|
||||||
|
try {
|
||||||
|
await backendStore.apiPost("/api/check-updates");
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Failed to check for updates:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleManagedLogin() {
|
||||||
|
try {
|
||||||
|
await backendStore.apiPost("/api/remote/login", {
|
||||||
|
email: managedEmail,
|
||||||
|
password: managedPassword,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Login failed:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleManagedRegister() {
|
||||||
|
try {
|
||||||
|
await backendStore.apiPost("/api/remote/register", {
|
||||||
|
email: managedEmail,
|
||||||
|
password: managedPassword,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Register failed:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleOverlayClick(e: MouseEvent) {
|
||||||
|
if ((e.target as HTMLElement).classList.contains("settings-overlay")) {
|
||||||
|
handleCancel();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleKeydown(e: KeyboardEvent) {
|
||||||
|
if (e.key === "Escape") {
|
||||||
|
handleCancel();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<svelte:window onkeydown={handleKeydown} />
|
||||||
|
|
||||||
|
<!-- svelte-ignore a11y_click_events_have_key_events a11y_no_static_element_interactions -->
|
||||||
|
<div class="settings-overlay" role="presentation" onclick={handleOverlayClick}>
|
||||||
|
<div class="settings-panel">
|
||||||
|
<div class="settings-header">
|
||||||
|
<h2>Settings</h2>
|
||||||
|
<button class="close-btn" aria-label="Close settings" onclick={handleCancel}>
|
||||||
|
<svg
|
||||||
|
width="18"
|
||||||
|
height="18"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="none"
|
||||||
|
stroke="currentColor"
|
||||||
|
stroke-width="2"
|
||||||
|
stroke-linecap="round"
|
||||||
|
stroke-linejoin="round"
|
||||||
|
>
|
||||||
|
<line x1="18" y1="6" x2="6" y2="18"></line>
|
||||||
|
<line x1="6" y1="6" x2="18" y2="18"></line>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="settings-content">
|
||||||
|
<!-- User Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>User Settings</h3>
|
||||||
|
<div class="field">
|
||||||
|
<label for="user-name">Display Name</label>
|
||||||
|
<input id="user-name" type="text" bind:value={userName} />
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Audio Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Audio Settings</h3>
|
||||||
|
<div class="field">
|
||||||
|
<label for="audio-device">Audio Device</label>
|
||||||
|
<select id="audio-device" bind:value={audioDevice}>
|
||||||
|
<option value="default">Default</option>
|
||||||
|
{#each audioDevices as device}
|
||||||
|
<option value={device.id}>{device.name}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Transcription Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Transcription Settings</h3>
|
||||||
|
<div class="field">
|
||||||
|
<label for="model">Model</label>
|
||||||
|
<select id="model" bind:value={model}>
|
||||||
|
{#each modelOptions as opt}
|
||||||
|
<option value={opt}>{opt}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="language">Language</label>
|
||||||
|
<input id="language" type="text" bind:value={language} placeholder="en" />
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="compute-device">Compute Device</label>
|
||||||
|
<select id="compute-device" bind:value={computeDevice}>
|
||||||
|
{#each computeDevices as dev}
|
||||||
|
<option value={dev.id}>{dev.name}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="compute-type">Compute Type</label>
|
||||||
|
<select id="compute-type" bind:value={computeType}>
|
||||||
|
{#each computeTypeOptions as opt}
|
||||||
|
<option value={opt.value}>{opt.label}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Realtime Preview -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Realtime Preview</h3>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="enable-realtime">Enable Realtime Preview</label>
|
||||||
|
<input
|
||||||
|
id="enable-realtime"
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={enableRealtime}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{#if enableRealtime}
|
||||||
|
<div class="field">
|
||||||
|
<label for="realtime-model">Realtime Model</label>
|
||||||
|
<select id="realtime-model" bind:value={realtimeModel}>
|
||||||
|
{#each modelOptions as opt}
|
||||||
|
<option value={opt}>{opt}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="realtime-pause"
|
||||||
|
>Processing Pause: {realtimeProcessingPause.toFixed(2)}s</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="realtime-pause"
|
||||||
|
type="range"
|
||||||
|
min="0.01"
|
||||||
|
max="1.0"
|
||||||
|
step="0.01"
|
||||||
|
bind:value={realtimeProcessingPause}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- VAD Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>VAD Settings</h3>
|
||||||
|
<div class="field">
|
||||||
|
<label for="silero-sensitivity"
|
||||||
|
>Silero Sensitivity: {sileroSensitivity.toFixed(2)}</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="silero-sensitivity"
|
||||||
|
type="range"
|
||||||
|
min="0.0"
|
||||||
|
max="1.0"
|
||||||
|
step="0.05"
|
||||||
|
bind:value={sileroSensitivity}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="webrtc-sensitivity">WebRTC Sensitivity</label>
|
||||||
|
<select id="webrtc-sensitivity" bind:value={webrtcSensitivity}>
|
||||||
|
{#each webrtcOptions as opt}
|
||||||
|
<option value={opt.value}>{opt.label}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Timing -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Timing</h3>
|
||||||
|
<div class="field">
|
||||||
|
<label for="post-speech-silence"
|
||||||
|
>Post-Speech Silence: {postSpeechSilence.toFixed(2)}s</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="post-speech-silence"
|
||||||
|
type="range"
|
||||||
|
min="0.1"
|
||||||
|
max="3.0"
|
||||||
|
step="0.1"
|
||||||
|
bind:value={postSpeechSilence}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="min-recording"
|
||||||
|
>Min Recording Length: {minRecordingLength.toFixed(2)}s</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="min-recording"
|
||||||
|
type="range"
|
||||||
|
min="0.1"
|
||||||
|
max="5.0"
|
||||||
|
step="0.1"
|
||||||
|
bind:value={minRecordingLength}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="min-gap"
|
||||||
|
>Min Gap Between Recordings: {minGapBetween.toFixed(2)}s</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="min-gap"
|
||||||
|
type="range"
|
||||||
|
min="0"
|
||||||
|
max="3.0"
|
||||||
|
step="0.1"
|
||||||
|
bind:value={minGapBetween}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="continuous-mode">Continuous Mode</label>
|
||||||
|
<input
|
||||||
|
id="continuous-mode"
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={continuousMode}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Display Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Display Settings</h3>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="show-timestamps">Show Timestamps</label>
|
||||||
|
<input
|
||||||
|
id="show-timestamps"
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={showTimestamps}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="fade-seconds"
|
||||||
|
>Fade After Seconds: {fadeSeconds} (0 = never)</label
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
id="fade-seconds"
|
||||||
|
type="range"
|
||||||
|
min="0"
|
||||||
|
max="60"
|
||||||
|
step="1"
|
||||||
|
bind:value={fadeSeconds}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="max-lines">Max Lines: {maxLines}</label>
|
||||||
|
<input
|
||||||
|
id="max-lines"
|
||||||
|
type="range"
|
||||||
|
min="10"
|
||||||
|
max="500"
|
||||||
|
step="10"
|
||||||
|
bind:value={maxLines}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="font-size">Font Size: {fontSize}px</label>
|
||||||
|
<input
|
||||||
|
id="font-size"
|
||||||
|
type="range"
|
||||||
|
min="8"
|
||||||
|
max="32"
|
||||||
|
step="1"
|
||||||
|
bind:value={fontSize}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Color Settings -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Color Settings</h3>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="user-color">User Color</label>
|
||||||
|
<input id="user-color" type="color" bind:value={userColor} />
|
||||||
|
</div>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="text-color">Text Color</label>
|
||||||
|
<input id="text-color" type="color" bind:value={textColor} />
|
||||||
|
</div>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="bg-color">Background Color</label>
|
||||||
|
<input id="bg-color" type="color" bind:value={backgroundColor} />
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Server Sync -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Server Sync</h3>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="sync-enabled">Enable Server Sync</label>
|
||||||
|
<input
|
||||||
|
id="sync-enabled"
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={syncEnabled}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{#if syncEnabled}
|
||||||
|
<div class="field">
|
||||||
|
<label for="sync-url">Server URL</label>
|
||||||
|
<input
|
||||||
|
id="sync-url"
|
||||||
|
type="url"
|
||||||
|
bind:value={syncUrl}
|
||||||
|
placeholder="http://localhost:3000/api/send"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="sync-room">Room</label>
|
||||||
|
<input id="sync-room" type="text" bind:value={syncRoom} />
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="sync-passphrase">Passphrase</label>
|
||||||
|
<input
|
||||||
|
id="sync-passphrase"
|
||||||
|
type="password"
|
||||||
|
bind:value={syncPassphrase}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Remote Transcription -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Remote Transcription</h3>
|
||||||
|
<div class="radio-group">
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="radio"
|
||||||
|
name="remote-mode"
|
||||||
|
value="local"
|
||||||
|
bind:group={remoteMode}
|
||||||
|
/>
|
||||||
|
Local
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="radio"
|
||||||
|
name="remote-mode"
|
||||||
|
value="managed"
|
||||||
|
bind:group={remoteMode}
|
||||||
|
/>
|
||||||
|
Managed
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="radio"
|
||||||
|
name="remote-mode"
|
||||||
|
value="byok"
|
||||||
|
bind:group={remoteMode}
|
||||||
|
/>
|
||||||
|
BYOK (Bring Your Own Key)
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
{#if remoteMode !== "local"}
|
||||||
|
<div class="field">
|
||||||
|
<label for="remote-url">Server URL</label>
|
||||||
|
<input
|
||||||
|
id="remote-url"
|
||||||
|
type="url"
|
||||||
|
bind:value={remoteServerUrl}
|
||||||
|
placeholder="wss://your-proxy.com"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
{#if remoteMode === "managed"}
|
||||||
|
<div class="managed-auth">
|
||||||
|
<div class="field">
|
||||||
|
<label for="managed-email">Email</label>
|
||||||
|
<input
|
||||||
|
id="managed-email"
|
||||||
|
type="email"
|
||||||
|
bind:value={managedEmail}
|
||||||
|
placeholder="email@example.com"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label for="managed-password">Password</label>
|
||||||
|
<input
|
||||||
|
id="managed-password"
|
||||||
|
type="password"
|
||||||
|
bind:value={managedPassword}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div class="auth-buttons">
|
||||||
|
<button onclick={handleManagedLogin}>Login</button>
|
||||||
|
<button onclick={handleManagedRegister}>Register</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<!-- Updates -->
|
||||||
|
<section class="settings-section">
|
||||||
|
<h3>Updates</h3>
|
||||||
|
<div class="field-row">
|
||||||
|
<label for="auto-check-updates">Auto-Check for Updates</label>
|
||||||
|
<input
|
||||||
|
id="auto-check-updates"
|
||||||
|
type="checkbox"
|
||||||
|
bind:checked={autoCheckUpdates}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<button onclick={handleCheckUpdates}>Check Now</button>
|
||||||
|
</section>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="settings-footer">
|
||||||
|
<button onclick={handleCancel}>Cancel</button>
|
||||||
|
<button class="primary" onclick={handleSave}>Save</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.settings-overlay {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
bottom: 0;
|
||||||
|
background-color: rgba(0, 0, 0, 0.6);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
z-index: 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-panel {
|
||||||
|
background-color: var(--bg-primary);
|
||||||
|
border: 1px solid var(--border-color);
|
||||||
|
border-radius: 12px;
|
||||||
|
width: 560px;
|
||||||
|
max-width: 95vw;
|
||||||
|
max-height: 85vh;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 16px 20px;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-header h2 {
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.close-btn {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
padding: 0;
|
||||||
|
border: none;
|
||||||
|
border-radius: 6px;
|
||||||
|
background-color: transparent;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.close-btn:hover {
|
||||||
|
background-color: var(--bg-tertiary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-content {
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto;
|
||||||
|
padding: 16px 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-section {
|
||||||
|
margin-bottom: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-section:last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-section h3 {
|
||||||
|
font-size: 14px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--accent-blue);
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
padding-bottom: 6px;
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
}
|
||||||
|
|
||||||
|
.field {
|
||||||
|
margin-bottom: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field label {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
font-size: 12px;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-row label {
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.radio-group {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.radio-group label {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
font-size: 13px;
|
||||||
|
color: var(--text-primary);
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.managed-auth {
|
||||||
|
margin-top: 8px;
|
||||||
|
padding: 12px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
border-radius: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.auth-buttons {
|
||||||
|
display: flex;
|
||||||
|
gap: 8px;
|
||||||
|
margin-top: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.settings-footer {
|
||||||
|
display: flex;
|
||||||
|
justify-content: flex-end;
|
||||||
|
gap: 8px;
|
||||||
|
padding: 16px 20px;
|
||||||
|
border-top: 1px solid var(--border-color);
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
106
src/lib/components/StatusBar.svelte
Normal file
106
src/lib/components/StatusBar.svelte
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { backendStore } from "$lib/stores/backend";
|
||||||
|
import { configStore } from "$lib/stores/config";
|
||||||
|
|
||||||
|
let statusColor = $derived.by(() => {
|
||||||
|
switch (backendStore.appState) {
|
||||||
|
case "initializing":
|
||||||
|
return "#ff9800";
|
||||||
|
case "ready":
|
||||||
|
return "#4caf50";
|
||||||
|
case "transcribing":
|
||||||
|
return "#f44336";
|
||||||
|
case "error":
|
||||||
|
return "#f44336";
|
||||||
|
default:
|
||||||
|
return "#888";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let isPulsing = $derived(backendStore.appState === "transcribing");
|
||||||
|
let userName = $derived(configStore.config.user.name);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="status-bar">
|
||||||
|
<div class="status-left">
|
||||||
|
<span
|
||||||
|
class="status-indicator"
|
||||||
|
class:pulsing={isPulsing}
|
||||||
|
style="background-color: {statusColor}"
|
||||||
|
></span>
|
||||||
|
<span class="state-message">{backendStore.stateMessage}</span>
|
||||||
|
</div>
|
||||||
|
<div class="status-right">
|
||||||
|
{#if backendStore.deviceInfo}
|
||||||
|
<span class="device-info">{backendStore.deviceInfo}</span>
|
||||||
|
<span class="separator">|</span>
|
||||||
|
{/if}
|
||||||
|
<span class="user-name">{userName}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.status-bar {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 6px 20px;
|
||||||
|
background-color: var(--bg-secondary);
|
||||||
|
border-bottom: 1px solid var(--border-color);
|
||||||
|
font-size: 12px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-left {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-right {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-indicator {
|
||||||
|
width: 10px;
|
||||||
|
height: 10px;
|
||||||
|
border-radius: 50%;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-indicator.pulsing {
|
||||||
|
animation: pulse 1.5s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes pulse {
|
||||||
|
0%,
|
||||||
|
100% {
|
||||||
|
opacity: 1;
|
||||||
|
box-shadow: 0 0 0 0 rgba(244, 67, 54, 0.4);
|
||||||
|
}
|
||||||
|
50% {
|
||||||
|
opacity: 0.7;
|
||||||
|
box-shadow: 0 0 0 6px rgba(244, 67, 54, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.state-message {
|
||||||
|
color: var(--text-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.device-info {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.separator {
|
||||||
|
color: var(--text-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.user-name {
|
||||||
|
color: var(--accent-green);
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
110
src/lib/components/TranscriptionDisplay.svelte
Normal file
110
src/lib/components/TranscriptionDisplay.svelte
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { transcriptionStore } from "$lib/stores/transcriptions";
|
||||||
|
import { configStore } from "$lib/stores/config";
|
||||||
|
|
||||||
|
let container: HTMLDivElement | undefined = $state();
|
||||||
|
let showTimestamps = $derived(configStore.config.display.show_timestamps);
|
||||||
|
let items = $derived(transcriptionStore.items);
|
||||||
|
|
||||||
|
$effect(() => {
|
||||||
|
// Trigger on items length change to auto-scroll
|
||||||
|
const _len = items.length;
|
||||||
|
if (container) {
|
||||||
|
requestAnimationFrame(() => {
|
||||||
|
if (container) {
|
||||||
|
container.scrollTop = container.scrollHeight;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="transcription-display" bind:this={container}>
|
||||||
|
{#each items as item (item.id)}
|
||||||
|
<div class="transcription-item" class:preview={item.isPreview}>
|
||||||
|
{#if showTimestamps && item.timestamp}
|
||||||
|
<span class="timestamp">[{item.timestamp}]</span>
|
||||||
|
{/if}
|
||||||
|
{#if item.userName}
|
||||||
|
<span class="user-name">{item.userName}:</span>
|
||||||
|
{/if}
|
||||||
|
{#if item.isPreview}
|
||||||
|
<span class="preview-indicator">[...]</span>
|
||||||
|
{/if}
|
||||||
|
<span class="text">{item.text}</span>
|
||||||
|
</div>
|
||||||
|
{:else}
|
||||||
|
<div class="empty-state">
|
||||||
|
Transcriptions will appear here...
|
||||||
|
</div>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.transcription-display {
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto;
|
||||||
|
padding: 12px 20px;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.transcription-item {
|
||||||
|
padding: 6px 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
background-color: rgba(255, 255, 255, 0.03);
|
||||||
|
animation: fadeIn 0.2s ease-out;
|
||||||
|
line-height: 1.6;
|
||||||
|
word-wrap: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
.transcription-item.preview {
|
||||||
|
font-style: italic;
|
||||||
|
opacity: 0.7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.timestamp {
|
||||||
|
color: #888;
|
||||||
|
font-size: 0.85em;
|
||||||
|
margin-right: 8px;
|
||||||
|
font-family: monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.user-name {
|
||||||
|
color: #4caf50;
|
||||||
|
font-weight: 700;
|
||||||
|
margin-right: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.preview-indicator {
|
||||||
|
color: #888;
|
||||||
|
font-size: 0.85em;
|
||||||
|
margin-right: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.text {
|
||||||
|
color: #ffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.empty-state {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
height: 100%;
|
||||||
|
color: var(--text-muted);
|
||||||
|
font-size: 15px;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes fadeIn {
|
||||||
|
from {
|
||||||
|
opacity: 0;
|
||||||
|
transform: translateY(4px);
|
||||||
|
}
|
||||||
|
to {
|
||||||
|
opacity: 1;
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
266
src/lib/stores/backend.ts
Normal file
266
src/lib/stores/backend.ts
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
/**
|
||||||
|
* Backend store - manages WebSocket connection and REST API communication
|
||||||
|
* with the Python backend server running on localhost.
|
||||||
|
*
|
||||||
|
* The backend port defaults to 8081 but can be updated at runtime via
|
||||||
|
* `setPort()`. The WebSocket connects to /ws/control for real-time push
|
||||||
|
* of transcriptions, previews, and state changes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type ConnectionState = "connecting" | "connected" | "disconnected" | "error";
|
||||||
|
export type AppState = "initializing" | "ready" | "transcribing" | "reloading" | "error";
|
||||||
|
|
||||||
|
interface BackendState {
|
||||||
|
port: number;
|
||||||
|
connectionState: ConnectionState;
|
||||||
|
appState: AppState;
|
||||||
|
stateMessage: string;
|
||||||
|
deviceInfo: string;
|
||||||
|
wsConnection: WebSocket | null;
|
||||||
|
version: string;
|
||||||
|
lastError: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = $state<BackendState>({
|
||||||
|
port: 8081,
|
||||||
|
connectionState: "disconnected",
|
||||||
|
appState: "initializing",
|
||||||
|
stateMessage: "Connecting to backend...",
|
||||||
|
deviceInfo: "",
|
||||||
|
wsConnection: null,
|
||||||
|
version: "1.4.0",
|
||||||
|
lastError: "",
|
||||||
|
});
|
||||||
|
|
||||||
|
let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
let reconnectAttempts = 0;
|
||||||
|
const MAX_RECONNECT_DELAY_MS = 30_000;
|
||||||
|
const BASE_RECONNECT_DELAY_MS = 1_000;
|
||||||
|
|
||||||
|
// ── URL helpers ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function apiUrl(path: string): string {
|
||||||
|
const normalised = path.startsWith("/") ? path : `/${path}`;
|
||||||
|
return `http://localhost:${state.port}${normalised}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function apiFetch(path: string, options?: RequestInit): Promise<Response> {
|
||||||
|
const url = apiUrl(path);
|
||||||
|
const method = options?.method?.toUpperCase() ?? "GET";
|
||||||
|
const headers = new Headers(options?.headers);
|
||||||
|
if (method !== "GET" && !headers.has("Content-Type")) {
|
||||||
|
headers.set("Content-Type", "application/json");
|
||||||
|
}
|
||||||
|
return fetch(url, { ...options, headers });
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── WebSocket management ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
function connectWebSocket() {
|
||||||
|
// Tear down any existing connection
|
||||||
|
disconnect();
|
||||||
|
|
||||||
|
state.connectionState = "connecting";
|
||||||
|
reconnectAttempts = 0;
|
||||||
|
|
||||||
|
_openSocket();
|
||||||
|
}
|
||||||
|
|
||||||
|
function _openSocket() {
|
||||||
|
const wsUrl = `ws://localhost:${state.port}/ws/control`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ws = new WebSocket(wsUrl);
|
||||||
|
|
||||||
|
ws.onopen = () => {
|
||||||
|
state.connectionState = "connected";
|
||||||
|
state.lastError = "";
|
||||||
|
reconnectAttempts = 0;
|
||||||
|
if (reconnectTimer) {
|
||||||
|
clearTimeout(reconnectTimer);
|
||||||
|
reconnectTimer = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ws.onmessage = (event) => {
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(event.data);
|
||||||
|
handleWebSocketMessage(data);
|
||||||
|
} catch {
|
||||||
|
// ignore parse errors
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ws.onclose = () => {
|
||||||
|
state.wsConnection = null;
|
||||||
|
if (state.connectionState !== "disconnected") {
|
||||||
|
state.connectionState = "error";
|
||||||
|
state.stateMessage = "Disconnected from backend";
|
||||||
|
_scheduleReconnect();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
ws.onerror = () => {
|
||||||
|
state.lastError = "WebSocket error";
|
||||||
|
// onclose fires after this, which handles reconnect
|
||||||
|
};
|
||||||
|
|
||||||
|
state.wsConnection = ws;
|
||||||
|
} catch {
|
||||||
|
state.connectionState = "error";
|
||||||
|
state.stateMessage = "Failed to connect";
|
||||||
|
_scheduleReconnect();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function _scheduleReconnect() {
|
||||||
|
if (reconnectTimer) return;
|
||||||
|
|
||||||
|
const delay = Math.min(
|
||||||
|
BASE_RECONNECT_DELAY_MS * Math.pow(2, reconnectAttempts),
|
||||||
|
MAX_RECONNECT_DELAY_MS,
|
||||||
|
);
|
||||||
|
reconnectAttempts++;
|
||||||
|
|
||||||
|
reconnectTimer = setTimeout(() => {
|
||||||
|
reconnectTimer = null;
|
||||||
|
if (state.connectionState !== "disconnected") {
|
||||||
|
state.connectionState = "connecting";
|
||||||
|
_openSocket();
|
||||||
|
}
|
||||||
|
}, delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
function disconnect() {
|
||||||
|
if (reconnectTimer) {
|
||||||
|
clearTimeout(reconnectTimer);
|
||||||
|
reconnectTimer = null;
|
||||||
|
}
|
||||||
|
state.connectionState = "disconnected";
|
||||||
|
if (state.wsConnection) {
|
||||||
|
const ws = state.wsConnection;
|
||||||
|
ws.onclose = null;
|
||||||
|
ws.onerror = null;
|
||||||
|
ws.close();
|
||||||
|
state.wsConnection = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── WebSocket message handling ───────────────────────────────────────
|
||||||
|
|
||||||
|
function handleWebSocketMessage(data: Record<string, unknown>) {
|
||||||
|
// Handle state changes locally
|
||||||
|
if (data.type === "state_changed") {
|
||||||
|
if (data.state) {
|
||||||
|
state.appState = data.state as AppState;
|
||||||
|
}
|
||||||
|
if (data.message) {
|
||||||
|
state.stateMessage = data.message as string;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.type === "error") {
|
||||||
|
state.lastError = (data.message as string) ?? "Unknown error";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatch to window for other stores (transcriptions, etc.)
|
||||||
|
if (data.type === "transcription") {
|
||||||
|
window.dispatchEvent(
|
||||||
|
new CustomEvent("backend:transcription", { detail: data })
|
||||||
|
);
|
||||||
|
} else if (data.type === "preview") {
|
||||||
|
window.dispatchEvent(
|
||||||
|
new CustomEvent("backend:preview", { detail: data })
|
||||||
|
);
|
||||||
|
} else if (data.type === "credits_low") {
|
||||||
|
window.dispatchEvent(
|
||||||
|
new CustomEvent("backend:credits_low", { detail: data })
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Port management ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function setPort(newPort: number) {
|
||||||
|
if (newPort === state.port) return;
|
||||||
|
state.port = newPort;
|
||||||
|
// Reconnect with new port if we had a connection
|
||||||
|
if (state.connectionState !== "disconnected") {
|
||||||
|
connectWebSocket();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Typed REST helpers ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function apiGet<T = unknown>(path: string): Promise<T> {
|
||||||
|
const resp = await apiFetch(path);
|
||||||
|
if (!resp.ok) throw new Error(`GET ${path} failed: ${resp.status}`);
|
||||||
|
return resp.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function apiPost<T = unknown>(
|
||||||
|
path: string,
|
||||||
|
body?: unknown
|
||||||
|
): Promise<T> {
|
||||||
|
const resp = await apiFetch(path, {
|
||||||
|
method: "POST",
|
||||||
|
body: body !== undefined ? JSON.stringify(body) : undefined,
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`POST ${path} failed: ${resp.status}`);
|
||||||
|
return resp.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function apiPut<T = unknown>(
|
||||||
|
path: string,
|
||||||
|
body?: unknown
|
||||||
|
): Promise<T> {
|
||||||
|
const resp = await apiFetch(path, {
|
||||||
|
method: "PUT",
|
||||||
|
body: body !== undefined ? JSON.stringify(body) : undefined,
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`PUT ${path} failed: ${resp.status}`);
|
||||||
|
return resp.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Public API ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export const backendStore = {
|
||||||
|
get port() {
|
||||||
|
return state.port;
|
||||||
|
},
|
||||||
|
get connectionState() {
|
||||||
|
return state.connectionState;
|
||||||
|
},
|
||||||
|
get connected() {
|
||||||
|
return state.connectionState === "connected";
|
||||||
|
},
|
||||||
|
get appState() {
|
||||||
|
return state.appState;
|
||||||
|
},
|
||||||
|
get stateMessage() {
|
||||||
|
return state.stateMessage;
|
||||||
|
},
|
||||||
|
get deviceInfo() {
|
||||||
|
return state.deviceInfo;
|
||||||
|
},
|
||||||
|
get version() {
|
||||||
|
return state.version;
|
||||||
|
},
|
||||||
|
get lastError() {
|
||||||
|
return state.lastError;
|
||||||
|
},
|
||||||
|
get apiBaseUrl() {
|
||||||
|
return `http://localhost:${state.port}`;
|
||||||
|
},
|
||||||
|
get wsUrl() {
|
||||||
|
return `ws://localhost:${state.port}/ws/control`;
|
||||||
|
},
|
||||||
|
setPort,
|
||||||
|
connect: connectWebSocket,
|
||||||
|
disconnect,
|
||||||
|
apiUrl,
|
||||||
|
apiFetch,
|
||||||
|
apiGet,
|
||||||
|
apiPost,
|
||||||
|
apiPut,
|
||||||
|
};
|
||||||
243
src/lib/stores/config.ts
Normal file
243
src/lib/stores/config.ts
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
/**
|
||||||
|
* Config store - manages application configuration loaded from
|
||||||
|
* and saved to the Python backend via the backend store's API helpers.
|
||||||
|
*
|
||||||
|
* The backend accepts PUT /api/config with `{ settings: { "dot.key": value } }`.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { backendStore } from "$lib/stores/backend";
|
||||||
|
|
||||||
|
export interface AppConfig {
|
||||||
|
user: {
|
||||||
|
name: string;
|
||||||
|
id: string;
|
||||||
|
};
|
||||||
|
audio: {
|
||||||
|
input_device: string;
|
||||||
|
sample_rate: number;
|
||||||
|
};
|
||||||
|
transcription: {
|
||||||
|
model: string;
|
||||||
|
device: string;
|
||||||
|
language: string;
|
||||||
|
compute_type: string;
|
||||||
|
enable_realtime_transcription: boolean;
|
||||||
|
realtime_model: string;
|
||||||
|
realtime_processing_pause: number;
|
||||||
|
silero_sensitivity: number;
|
||||||
|
silero_use_onnx: boolean;
|
||||||
|
webrtc_sensitivity: number;
|
||||||
|
post_speech_silence_duration: number;
|
||||||
|
min_length_of_recording: number;
|
||||||
|
min_gap_between_recordings: number;
|
||||||
|
pre_recording_buffer_duration: number;
|
||||||
|
beam_size: number;
|
||||||
|
initial_prompt: string;
|
||||||
|
no_log_file: boolean;
|
||||||
|
continuous_mode: boolean;
|
||||||
|
};
|
||||||
|
server_sync: {
|
||||||
|
enabled: boolean;
|
||||||
|
url: string;
|
||||||
|
room: string;
|
||||||
|
passphrase: string;
|
||||||
|
};
|
||||||
|
display: {
|
||||||
|
show_timestamps: boolean;
|
||||||
|
max_lines: number;
|
||||||
|
font_source: string;
|
||||||
|
font_family: string;
|
||||||
|
websafe_font: string;
|
||||||
|
google_font: string;
|
||||||
|
custom_font_file: string;
|
||||||
|
font_size: number;
|
||||||
|
theme: string;
|
||||||
|
fade_after_seconds: number;
|
||||||
|
user_color: string;
|
||||||
|
text_color: string;
|
||||||
|
background_color: string;
|
||||||
|
};
|
||||||
|
web_server: {
|
||||||
|
port: number;
|
||||||
|
host: string;
|
||||||
|
};
|
||||||
|
remote: {
|
||||||
|
mode: string;
|
||||||
|
server_url: string;
|
||||||
|
auth_token: string;
|
||||||
|
byok_api_key: string;
|
||||||
|
deepgram_model: string;
|
||||||
|
language: string;
|
||||||
|
fallback_to_local: boolean;
|
||||||
|
};
|
||||||
|
updates: {
|
||||||
|
auto_check: boolean;
|
||||||
|
gitea_url: string;
|
||||||
|
owner: string;
|
||||||
|
repo: string;
|
||||||
|
skipped_versions: string[];
|
||||||
|
last_check: string;
|
||||||
|
check_interval_hours: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function getDefaultConfig(): AppConfig {
|
||||||
|
return {
|
||||||
|
user: { name: "User", id: "" },
|
||||||
|
audio: { input_device: "default", sample_rate: 16000 },
|
||||||
|
transcription: {
|
||||||
|
model: "base.en",
|
||||||
|
device: "auto",
|
||||||
|
language: "en",
|
||||||
|
compute_type: "default",
|
||||||
|
enable_realtime_transcription: false,
|
||||||
|
realtime_model: "tiny.en",
|
||||||
|
realtime_processing_pause: 0.1,
|
||||||
|
silero_sensitivity: 0.4,
|
||||||
|
silero_use_onnx: true,
|
||||||
|
webrtc_sensitivity: 3,
|
||||||
|
post_speech_silence_duration: 0.3,
|
||||||
|
min_length_of_recording: 0.5,
|
||||||
|
min_gap_between_recordings: 0,
|
||||||
|
pre_recording_buffer_duration: 0.2,
|
||||||
|
beam_size: 5,
|
||||||
|
initial_prompt: "",
|
||||||
|
no_log_file: true,
|
||||||
|
continuous_mode: false,
|
||||||
|
},
|
||||||
|
server_sync: {
|
||||||
|
enabled: false,
|
||||||
|
url: "http://localhost:3000/api/send",
|
||||||
|
room: "default",
|
||||||
|
passphrase: "",
|
||||||
|
},
|
||||||
|
display: {
|
||||||
|
show_timestamps: true,
|
||||||
|
max_lines: 100,
|
||||||
|
font_source: "System Font",
|
||||||
|
font_family: "Courier",
|
||||||
|
websafe_font: "Arial",
|
||||||
|
google_font: "Roboto",
|
||||||
|
custom_font_file: "",
|
||||||
|
font_size: 12,
|
||||||
|
theme: "dark",
|
||||||
|
fade_after_seconds: 10,
|
||||||
|
user_color: "#4CAF50",
|
||||||
|
text_color: "#FFFFFF",
|
||||||
|
background_color: "#000000B3",
|
||||||
|
},
|
||||||
|
web_server: { port: 8080, host: "127.0.0.1" },
|
||||||
|
remote: {
|
||||||
|
mode: "local",
|
||||||
|
server_url: "",
|
||||||
|
auth_token: "",
|
||||||
|
byok_api_key: "",
|
||||||
|
deepgram_model: "nova-2",
|
||||||
|
language: "en-US",
|
||||||
|
fallback_to_local: true,
|
||||||
|
},
|
||||||
|
updates: {
|
||||||
|
auto_check: true,
|
||||||
|
gitea_url: "https://repo.anhonesthost.net",
|
||||||
|
owner: "streamer-tools",
|
||||||
|
repo: "local-transcription",
|
||||||
|
skipped_versions: [],
|
||||||
|
last_check: "",
|
||||||
|
check_interval_hours: 24,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let config = $state<AppConfig>(getDefaultConfig());
|
||||||
|
let loading = $state(false);
|
||||||
|
let error = $state("");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch the full configuration tree from the backend.
|
||||||
|
* GET /api/config
|
||||||
|
*/
|
||||||
|
async function fetchConfig(): Promise<void> {
|
||||||
|
loading = true;
|
||||||
|
error = "";
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = await backendStore.apiGet<Record<string, unknown>>("/api/config");
|
||||||
|
// Deep merge with defaults to ensure all keys exist
|
||||||
|
config = deepMerge(getDefaultConfig(), data) as AppConfig;
|
||||||
|
} catch (err) {
|
||||||
|
error = err instanceof Error ? err.message : String(err);
|
||||||
|
console.error("[config] fetchConfig failed:", error);
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function deepMerge(target: Record<string, unknown>, source: Record<string, unknown>): Record<string, unknown> {
|
||||||
|
const result = { ...target };
|
||||||
|
for (const key of Object.keys(source)) {
|
||||||
|
if (
|
||||||
|
source[key] &&
|
||||||
|
typeof source[key] === "object" &&
|
||||||
|
!Array.isArray(source[key]) &&
|
||||||
|
target[key] &&
|
||||||
|
typeof target[key] === "object" &&
|
||||||
|
!Array.isArray(target[key])
|
||||||
|
) {
|
||||||
|
result[key] = deepMerge(
|
||||||
|
target[key] as Record<string, unknown>,
|
||||||
|
source[key] as Record<string, unknown>
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
result[key] = source[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a batch of setting updates to the backend.
|
||||||
|
* PUT /api/config with body `{ settings: { "dot.key": value, ... } }`
|
||||||
|
*
|
||||||
|
* Keys use dot-notation, e.g. `{ "transcription.model": "small.en" }`.
|
||||||
|
*
|
||||||
|
* Returns the response payload on success, or throws on failure.
|
||||||
|
*/
|
||||||
|
async function updateConfig(
|
||||||
|
settings: Record<string, unknown>,
|
||||||
|
): Promise<{ status: string; message: string; engine_reloaded: boolean }> {
|
||||||
|
loading = true;
|
||||||
|
error = "";
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await backendStore.apiPut<{
|
||||||
|
status: string;
|
||||||
|
message: string;
|
||||||
|
engine_reloaded: boolean;
|
||||||
|
}>("/api/config", { settings });
|
||||||
|
|
||||||
|
// Refresh the local config tree so the UI stays in sync
|
||||||
|
await fetchConfig();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (err) {
|
||||||
|
error = err instanceof Error ? err.message : String(err);
|
||||||
|
console.error("[config] updateConfig failed:", error);
|
||||||
|
throw err;
|
||||||
|
} finally {
|
||||||
|
loading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const configStore = {
|
||||||
|
get config() {
|
||||||
|
return config;
|
||||||
|
},
|
||||||
|
get loading() {
|
||||||
|
return loading;
|
||||||
|
},
|
||||||
|
get error() {
|
||||||
|
return error;
|
||||||
|
},
|
||||||
|
fetchConfig,
|
||||||
|
updateConfig,
|
||||||
|
};
|
||||||
109
src/lib/stores/transcriptions.ts
Normal file
109
src/lib/stores/transcriptions.ts
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
/**
|
||||||
|
* Transcriptions store - manages the list of transcription items
|
||||||
|
* received from the backend via WebSocket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface TranscriptionItem {
|
||||||
|
id: string;
|
||||||
|
text: string;
|
||||||
|
userName: string;
|
||||||
|
timestamp: string;
|
||||||
|
isPreview: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
let items = $state<TranscriptionItem[]>([]);
|
||||||
|
let nextId = 0;
|
||||||
|
|
||||||
|
function generateId(): string {
|
||||||
|
return `t-${Date.now()}-${nextId++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function addTranscription(data: {
|
||||||
|
text?: string;
|
||||||
|
user_name?: string;
|
||||||
|
timestamp?: string;
|
||||||
|
}) {
|
||||||
|
// When a final transcription arrives, remove any existing preview
|
||||||
|
const previewIndex = items.findIndex((item) => item.isPreview);
|
||||||
|
if (previewIndex !== -1) {
|
||||||
|
items.splice(previewIndex, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
items.push({
|
||||||
|
id: generateId(),
|
||||||
|
text: data.text ?? "",
|
||||||
|
userName: data.user_name ?? "",
|
||||||
|
timestamp: data.timestamp ?? "",
|
||||||
|
isPreview: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Keep a reasonable limit
|
||||||
|
if (items.length > 500) {
|
||||||
|
items.splice(0, items.length - 500);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setPreview(data: {
|
||||||
|
text?: string;
|
||||||
|
user_name?: string;
|
||||||
|
timestamp?: string;
|
||||||
|
}) {
|
||||||
|
const existingIndex = items.findIndex((item) => item.isPreview);
|
||||||
|
const previewItem: TranscriptionItem = {
|
||||||
|
id: existingIndex !== -1 ? items[existingIndex].id : generateId(),
|
||||||
|
text: data.text ?? "",
|
||||||
|
userName: data.user_name ?? "",
|
||||||
|
timestamp: data.timestamp ?? "",
|
||||||
|
isPreview: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (existingIndex !== -1) {
|
||||||
|
items[existingIndex] = previewItem;
|
||||||
|
} else {
|
||||||
|
items.push(previewItem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearAll() {
|
||||||
|
items.length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getPlainText(): string {
|
||||||
|
return items
|
||||||
|
.filter((item) => !item.isPreview)
|
||||||
|
.map((item) => {
|
||||||
|
let line = "";
|
||||||
|
if (item.timestamp) line += `[${item.timestamp}] `;
|
||||||
|
if (item.userName) line += `${item.userName}: `;
|
||||||
|
line += item.text;
|
||||||
|
return line;
|
||||||
|
})
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Listen for backend events
|
||||||
|
if (typeof window !== "undefined") {
|
||||||
|
window.addEventListener("backend:transcription", ((e: CustomEvent) => {
|
||||||
|
addTranscription(e.detail);
|
||||||
|
}) as EventListener);
|
||||||
|
|
||||||
|
window.addEventListener("backend:preview", ((e: CustomEvent) => {
|
||||||
|
setPreview(e.detail);
|
||||||
|
}) as EventListener);
|
||||||
|
}
|
||||||
|
|
||||||
|
export const transcriptionStore = {
|
||||||
|
get items() {
|
||||||
|
return items;
|
||||||
|
},
|
||||||
|
get currentPreview(): TranscriptionItem | null {
|
||||||
|
return items.find((item) => item.isPreview) ?? null;
|
||||||
|
},
|
||||||
|
get transcriptions(): TranscriptionItem[] {
|
||||||
|
return items.filter((item) => !item.isPreview);
|
||||||
|
},
|
||||||
|
addTranscription,
|
||||||
|
setPreview,
|
||||||
|
clearAll,
|
||||||
|
getPlainText,
|
||||||
|
};
|
||||||
6
src/main.ts
Normal file
6
src/main.ts
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
import App from "./App.svelte";
|
||||||
|
import { mount } from "svelte";
|
||||||
|
import "./app.css";
|
||||||
|
|
||||||
|
const app = mount(App, { target: document.getElementById("app")! });
|
||||||
|
export default app;
|
||||||
5
svelte.config.js
Normal file
5
svelte.config.js
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
|
||||||
|
|
||||||
|
export default {
|
||||||
|
preprocess: vitePreprocess(),
|
||||||
|
};
|
||||||
15
tsconfig.json
Normal file
15
tsconfig.json
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"extends": "@tsconfig/svelte/tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ESNext",
|
||||||
|
"useDefineForClassFields": true,
|
||||||
|
"module": "ESNext",
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"allowJs": true,
|
||||||
|
"checkJs": true,
|
||||||
|
"isolatedModules": true,
|
||||||
|
"moduleDetection": "force",
|
||||||
|
"strict": true
|
||||||
|
},
|
||||||
|
"include": ["src/**/*.ts", "src/**/*.svelte"]
|
||||||
|
}
|
||||||
21
vite.config.ts
Normal file
21
vite.config.ts
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import { defineConfig } from "vite";
|
||||||
|
import { svelte } from "@sveltejs/vite-plugin-svelte";
|
||||||
|
import path from "path";
|
||||||
|
|
||||||
|
// https://vitejs.dev/config/
|
||||||
|
export default defineConfig({
|
||||||
|
plugins: [svelte()],
|
||||||
|
clearScreen: false,
|
||||||
|
resolve: {
|
||||||
|
alias: {
|
||||||
|
$lib: path.resolve("./src/lib"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
server: {
|
||||||
|
port: 1420,
|
||||||
|
strictPort: true,
|
||||||
|
watch: {
|
||||||
|
ignored: ["**/src-tauri/**", "**/client/**", "**/server/**", "**/backend/**", "**/gui/**"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user