Compare commits
58 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36b4f7dad5 | ||
|
|
1ecb23b83f | ||
|
|
4b88871a9b | ||
|
|
0ae48a67d5 | ||
|
|
924cae6c75 | ||
|
|
5139936e18 | ||
|
|
47724f1ac0 | ||
|
|
3b204be37e | ||
|
|
4c02a48135 | ||
|
|
997e97c19a | ||
|
|
6ca8fc41b2 | ||
|
|
d9d90563cc | ||
|
|
5a674ed199 | ||
|
|
9d78fce3f0 | ||
|
|
a8de39de84 | ||
|
|
bc82584dff | ||
|
|
4d0b4ee1c5 | ||
|
|
c73e9de0ac | ||
|
|
288c6ad6a3 | ||
|
|
af8046f9b1 | ||
|
|
6003885519 | ||
|
|
8829846b53 | ||
|
|
cf449d9338 | ||
|
|
5a6910834c | ||
|
|
a6c7eb5d5e | ||
|
|
135d5d534b | ||
|
|
76f34fe17d | ||
|
|
68ad31b6a7 | ||
|
|
fcbe405e23 | ||
|
|
4adfd2adc6 | ||
|
|
f3843d59f1 | ||
|
|
ad68251e04 | ||
|
|
9468d01a88 | ||
|
|
a3151ad55e | ||
|
|
5bff40e9b4 | ||
|
|
0ccb02ba27 | ||
|
|
aa4033b412 | ||
|
|
b4b9435317 | ||
|
|
ee1d4f8643 | ||
|
|
4a186d1de6 | ||
|
|
fff37992b1 | ||
|
|
8afe3230d3 | ||
|
|
04e7fb1a99 | ||
|
|
9a282215c9 | ||
|
|
cc2d17a627 | ||
|
|
61c5ffa4fa | ||
|
|
289b9dabe1 | ||
|
|
9522f28c57 | ||
|
|
a8e2e7dca8 | ||
|
|
3bcf4f09a3 | ||
|
|
ef5734ef15 | ||
| c9db43d56c | |||
|
|
4c519a109a | ||
|
|
47ca74e75d | ||
|
|
25d2a55efb | ||
|
|
af534bf768 | ||
|
|
9ff883e2e3 | ||
| bb8a8c251d |
9
.claude/settings.local.json
Normal file
9
.claude/settings.local.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(python3:*)",
|
||||||
|
"Bash(node --check:*)",
|
||||||
|
"Bash(ls:*)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
103
.gitea/workflows/build-app-linux.yml
Normal file
103
.gitea/workflows/build-app-linux.yml
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
name: Build App (Linux)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Release tag to build (e.g. v1.4.5)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-linux:
|
||||||
|
name: Build App (Linux)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- name: Determine tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
TAG="${{ inputs.tag }}"
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG="${{ github.event.inputs.tag }}"
|
||||||
|
fi
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/v*' | head -1 | sed 's|.*refs/tags/||')
|
||||||
|
fi
|
||||||
|
echo "Building for tag: ${TAG}"
|
||||||
|
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.tag.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf xdg-utils rpm
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.tag.outputs.tag }}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
echo "Waiting for release ${TAG} to be available..."
|
||||||
|
RELEASE_ID=""
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f \( -name "*.deb" -o -name "*.rpm" -o -name "*.AppImage" \) | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
101
.gitea/workflows/build-app-macos.yml
Normal file
101
.gitea/workflows/build-app-macos.yml
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
name: Build App (macOS)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Release tag to build (e.g. v1.4.5)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-macos:
|
||||||
|
name: Build App (macOS)
|
||||||
|
runs-on: macos-latest
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
steps:
|
||||||
|
- name: Determine tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
TAG="${{ inputs.tag }}"
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG="${{ github.event.inputs.tag }}"
|
||||||
|
fi
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/v*' | head -1 | sed 's|.*refs/tags/||')
|
||||||
|
fi
|
||||||
|
echo "Building for tag: ${TAG}"
|
||||||
|
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.tag.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: brew install --quiet create-dmg || true
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
which jq || brew install jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.tag.outputs.tag }}"
|
||||||
|
echo "Release tag: ${TAG}"
|
||||||
|
|
||||||
|
echo "Waiting for release ${TAG} to be available..."
|
||||||
|
RELEASE_ID=""
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
find src-tauri/target/release/bundle -type f -name "*.dmg" | while IFS= read -r file; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
117
.gitea/workflows/build-app-windows.yml
Normal file
117
.gitea/workflows/build-app-windows.yml
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
name: Build App (Windows)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Release tag to build (e.g. v1.4.5)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
NODE_VERSION: "20"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-windows:
|
||||||
|
name: Build App (Windows)
|
||||||
|
runs-on: windows-latest
|
||||||
|
env:
|
||||||
|
RELEASE_TAG: ${{ inputs.tag }}
|
||||||
|
steps:
|
||||||
|
- name: Show tag
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
Write-Host "Building for tag: $env:RELEASE_TAG"
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.tag }}
|
||||||
|
|
||||||
|
- name: Set up Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: ${{ env.NODE_VERSION }}
|
||||||
|
|
||||||
|
- name: Install Rust stable
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command rustup -ErrorAction SilentlyContinue) {
|
||||||
|
rustup default stable
|
||||||
|
} else {
|
||||||
|
Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||||
|
.\rustup-init.exe -y --default-toolchain stable
|
||||||
|
echo "$env:USERPROFILE\.cargo\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Install npm dependencies
|
||||||
|
shell: powershell
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Build Tauri app
|
||||||
|
shell: powershell
|
||||||
|
run: npm run tauri build
|
||||||
|
|
||||||
|
- name: Upload to release
|
||||||
|
shell: powershell
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||||
|
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
|
||||||
|
$TAG = $env:RELEASE_TAG
|
||||||
|
Write-Host "Release tag: $TAG"
|
||||||
|
|
||||||
|
if (-not $TAG) {
|
||||||
|
Write-Host "ERROR: RELEASE_TAG is empty"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "Waiting for release $TAG to be available..."
|
||||||
|
$RELEASE_ID = $null
|
||||||
|
|
||||||
|
for ($i = 1; $i -le 30; $i++) {
|
||||||
|
try {
|
||||||
|
$release = Invoke-RestMethod -Uri "$REPO_API/releases/tags/$TAG" -Headers $Headers -ErrorAction Stop
|
||||||
|
$RELEASE_ID = $release.id
|
||||||
|
|
||||||
|
if ($RELEASE_ID) {
|
||||||
|
Write-Host "Found release: $TAG (ID: $RELEASE_ID)"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
Start-Sleep -Seconds 10
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not $RELEASE_ID) {
|
||||||
|
Write-Host "ERROR: Failed to find release for tag $TAG after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Get-ChildItem -Path src-tauri\target\release\bundle -Recurse -Include *.msi,*-setup.exe | ForEach-Object {
|
||||||
|
$filename = $_.Name
|
||||||
|
$encodedName = [System.Uri]::EscapeDataString($filename)
|
||||||
|
$size = [math]::Round($_.Length / 1MB, 1)
|
||||||
|
Write-Host "Uploading $filename ($size MB)..."
|
||||||
|
|
||||||
|
try {
|
||||||
|
$assets = Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets" -Headers $Headers
|
||||||
|
$existing = $assets | Where-Object { $_.name -eq $filename }
|
||||||
|
if ($existing) {
|
||||||
|
Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets/$($existing.id)" -Method Delete -Headers $Headers
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
$uploadUrl = "$REPO_API/releases/$RELEASE_ID/assets?name=$encodedName"
|
||||||
|
$result = curl.exe --fail --silent --show-error `
|
||||||
|
-X POST `
|
||||||
|
-H "Authorization: token $env:BUILD_TOKEN" `
|
||||||
|
-H "Content-Type: application/octet-stream" `
|
||||||
|
-T "$($_.FullName)" `
|
||||||
|
"$uploadUrl" 2>&1
|
||||||
|
if ($LASTEXITCODE -eq 0) {
|
||||||
|
Write-Host "Upload successful: $filename"
|
||||||
|
} else {
|
||||||
|
Write-Host "WARNING: Upload failed for ${filename}: $result"
|
||||||
|
}
|
||||||
|
}
|
||||||
118
.gitea/workflows/build-sidecar-linux.yml
Normal file
118
.gitea/workflows/build-sidecar-linux.yml
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
name: Build Sidecar (Linux)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-sidecar-linux:
|
||||||
|
name: Build Sidecar (Linux)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
steps:
|
||||||
|
- name: Determine tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
TAG="${{ inputs.tag }}"
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG="${{ github.event.inputs.tag }}"
|
||||||
|
fi
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/sidecar-v*' | head -1 | sed 's|.*refs/tags/||')
|
||||||
|
fi
|
||||||
|
echo "Building for tag: ${TAG}"
|
||||||
|
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.tag.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y portaudio19-dev
|
||||||
|
|
||||||
|
- name: Build sidecar (CUDA)
|
||||||
|
run: |
|
||||||
|
uv sync --frozen || uv sync
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CUDA)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cuda.zip .
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
rm -rf dist/local-transcription-backend build/
|
||||||
|
uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
|
||||||
|
# Run pyinstaller directly from venv to prevent uv run from
|
||||||
|
# re-resolving torch back to the CUDA version via pyproject.toml sources
|
||||||
|
.venv/bin/pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-linux-x86_64-cpu.zip .
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.tag.outputs.tag }}"
|
||||||
|
|
||||||
|
echo "Waiting for sidecar release ${TAG} to be available..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for file in sidecar-*.zip; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
109
.gitea/workflows/build-sidecar-macos.yml
Normal file
109
.gitea/workflows/build-sidecar-macos.yml
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
name: Build Sidecar (macOS)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-sidecar-macos:
|
||||||
|
name: Build Sidecar (macOS)
|
||||||
|
runs-on: macos-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
steps:
|
||||||
|
- name: Determine tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
TAG="${{ inputs.tag }}"
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG="${{ github.event.inputs.tag }}"
|
||||||
|
fi
|
||||||
|
if [ -z "$TAG" ]; then
|
||||||
|
TAG=$(git ls-remote --tags --sort=-v:refname origin 'refs/tags/sidecar-v*' | head -1 | sed 's|.*refs/tags/||')
|
||||||
|
fi
|
||||||
|
echo "Building for tag: ${TAG}"
|
||||||
|
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ steps.tag.outputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install system dependencies
|
||||||
|
run: brew install portaudio
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
env:
|
||||||
|
UV_NO_SOURCES: "1"
|
||||||
|
run: |
|
||||||
|
# UV_NO_SOURCES bypasses pyproject.toml's [tool.uv.sources] which forces
|
||||||
|
# torch from the CUDA index (no macOS ARM wheels there).
|
||||||
|
# Default PyPI torch includes MPS (Apple Silicon GPU) support.
|
||||||
|
uv sync
|
||||||
|
.venv/bin/pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
run: |
|
||||||
|
cd dist/local-transcription-backend && zip -r ../../sidecar-macos-aarch64-cpu.zip .
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
which jq || brew install jq
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.tag.outputs.tag }}"
|
||||||
|
|
||||||
|
echo "Waiting for sidecar release ${TAG} to be available..."
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
RELEASE_JSON=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/tags/${TAG}")
|
||||||
|
RELEASE_ID=$(echo "$RELEASE_JSON" | jq -r '.id // empty')
|
||||||
|
|
||||||
|
if [ -n "${RELEASE_ID}" ] && [ "${RELEASE_ID}" != "null" ]; then
|
||||||
|
echo "Found sidecar release: ${TAG} (ID: ${RELEASE_ID})"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "${RELEASE_ID}" ] || [ "${RELEASE_ID}" = "null" ]; then
|
||||||
|
echo "ERROR: Failed to find sidecar release for tag ${TAG} after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for file in sidecar-*.zip; do
|
||||||
|
filename=$(basename "$file")
|
||||||
|
encoded_name=$(echo "$filename" | sed 's/ /%20/g')
|
||||||
|
echo "Uploading ${filename} ($(du -h "$file" | cut -f1))..."
|
||||||
|
|
||||||
|
ASSET_ID=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets" | jq -r ".[] | select(.name == \"${filename}\") | .id // empty")
|
||||||
|
if [ -n "${ASSET_ID}" ]; then
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets/${ASSET_ID}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/octet-stream" \
|
||||||
|
-T "$file" \
|
||||||
|
"${REPO_API}/releases/${RELEASE_ID}/assets?name=${encoded_name}")
|
||||||
|
echo "Upload response: HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
145
.gitea/workflows/build-sidecar-windows.yml
Normal file
145
.gitea/workflows/build-sidecar-windows.yml
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
name: Build Sidecar (Windows)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: 'Sidecar release tag to build (e.g. sidecar-v1.0.3)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-sidecar-windows:
|
||||||
|
name: Build Sidecar (Windows)
|
||||||
|
runs-on: windows-latest
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.11"
|
||||||
|
RELEASE_TAG: ${{ inputs.tag }}
|
||||||
|
steps:
|
||||||
|
- name: Show tag
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
Write-Host "Building for tag: $env:RELEASE_TAG"
|
||||||
|
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ inputs.tag }}
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (Get-Command uv -ErrorAction SilentlyContinue) {
|
||||||
|
Write-Host "uv already installed: $(uv --version)"
|
||||||
|
} else {
|
||||||
|
irm https://astral.sh/uv/install.ps1 | iex
|
||||||
|
$uvPaths = @(
|
||||||
|
"$env:USERPROFILE\.local\bin",
|
||||||
|
"$env:USERPROFILE\.cargo\bin",
|
||||||
|
"$env:LOCALAPPDATA\uv\bin"
|
||||||
|
)
|
||||||
|
foreach ($p in $uvPaths) {
|
||||||
|
if (Test-Path $p) {
|
||||||
|
echo $p | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
shell: powershell
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
|
- name: Install 7-Zip
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
if (-not (Get-Command 7z -ErrorAction SilentlyContinue)) {
|
||||||
|
choco install 7zip -y
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Build sidecar (CUDA)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
uv sync --frozen
|
||||||
|
if ($LASTEXITCODE -ne 0) { uv sync }
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CUDA)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
7z a -tzip -mx=5 sidecar-windows-x86_64-cuda.zip .\dist\local-transcription-backend\*
|
||||||
|
|
||||||
|
- name: Build sidecar (CPU)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
Remove-Item -Recurse -Force dist\local-transcription-backend, build -ErrorAction SilentlyContinue
|
||||||
|
uv pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu --force-reinstall
|
||||||
|
.venv\Scripts\pyinstaller.exe local-transcription-headless.spec
|
||||||
|
|
||||||
|
- name: Package sidecar (CPU)
|
||||||
|
shell: powershell
|
||||||
|
run: |
|
||||||
|
7z a -tzip -mx=5 sidecar-windows-x86_64-cpu.zip .\dist\local-transcription-backend\*
|
||||||
|
|
||||||
|
- name: Upload to sidecar release
|
||||||
|
shell: powershell
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
$REPO_API = "${{ github.server_url }}/api/v1/repos/${{ github.repository }}"
|
||||||
|
$Headers = @{ "Authorization" = "token $env:BUILD_TOKEN" }
|
||||||
|
$TAG = $env:RELEASE_TAG
|
||||||
|
Write-Host "Release tag: $TAG"
|
||||||
|
|
||||||
|
if (-not $TAG) {
|
||||||
|
Write-Host "ERROR: RELEASE_TAG is empty"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Write-Host "Waiting for sidecar release $TAG to be available..."
|
||||||
|
$RELEASE_ID = $null
|
||||||
|
|
||||||
|
for ($i = 1; $i -le 30; $i++) {
|
||||||
|
try {
|
||||||
|
$release = Invoke-RestMethod -Uri "$REPO_API/releases/tags/$TAG" -Headers $Headers -ErrorAction Stop
|
||||||
|
$RELEASE_ID = $release.id
|
||||||
|
|
||||||
|
if ($RELEASE_ID) {
|
||||||
|
Write-Host "Found sidecar release: $TAG (ID: $RELEASE_ID)"
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
Write-Host "Attempt ${i}/30: Release not ready yet, retrying in 10s..."
|
||||||
|
Start-Sleep -Seconds 10
|
||||||
|
}
|
||||||
|
|
||||||
|
if (-not $RELEASE_ID) {
|
||||||
|
Write-Host "ERROR: Failed to find sidecar release for tag $TAG after 30 attempts."
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Get-ChildItem -Path . -Filter "sidecar-*.zip" | ForEach-Object {
|
||||||
|
$filename = $_.Name
|
||||||
|
$encodedName = [System.Uri]::EscapeDataString($filename)
|
||||||
|
$size = [math]::Round($_.Length / 1MB, 1)
|
||||||
|
Write-Host "Uploading $filename ($size MB)..."
|
||||||
|
|
||||||
|
try {
|
||||||
|
$assets = Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets" -Headers $Headers
|
||||||
|
$existing = $assets | Where-Object { $_.name -eq $filename }
|
||||||
|
if ($existing) {
|
||||||
|
Invoke-RestMethod -Uri "$REPO_API/releases/$RELEASE_ID/assets/$($existing.id)" -Method Delete -Headers $Headers
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
$uploadUrl = "$REPO_API/releases/$RELEASE_ID/assets?name=$encodedName"
|
||||||
|
$result = curl.exe --fail --silent --show-error `
|
||||||
|
-X POST `
|
||||||
|
-H "Authorization: token $env:BUILD_TOKEN" `
|
||||||
|
-H "Content-Type: application/octet-stream" `
|
||||||
|
-T "$($_.FullName)" `
|
||||||
|
"$uploadUrl" 2>&1
|
||||||
|
if ($LASTEXITCODE -eq 0) {
|
||||||
|
Write-Host "Upload successful: $filename"
|
||||||
|
} else {
|
||||||
|
Write-Host "WARNING: Upload failed for ${filename}: $result"
|
||||||
|
}
|
||||||
|
}
|
||||||
167
.gitea/workflows/release.yml
Normal file
167
.gitea/workflows/release.yml
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- 'src/**'
|
||||||
|
- 'src-tauri/**'
|
||||||
|
- 'package.json'
|
||||||
|
- 'vite.config.ts'
|
||||||
|
- 'index.html'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Run Tests
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Install npm deps
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Frontend tests
|
||||||
|
run: npx vitest run
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Python tests
|
||||||
|
run: |
|
||||||
|
uv venv .testvenv
|
||||||
|
VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
|
||||||
|
.testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
|
||||||
|
|
||||||
|
bump-version:
|
||||||
|
name: Bump version and tag
|
||||||
|
needs: test
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
new_version: ${{ steps.bump.outputs.new_version }}
|
||||||
|
tag: ${{ steps.bump.outputs.tag }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Configure git
|
||||||
|
run: |
|
||||||
|
git config user.name "Gitea Actions"
|
||||||
|
git config user.email "actions@gitea.local"
|
||||||
|
|
||||||
|
- name: Bump patch version
|
||||||
|
id: bump
|
||||||
|
run: |
|
||||||
|
CURRENT=$(grep '"version"' package.json | head -1 | sed 's/.*"version": *"\([^"]*\)".*/\1/')
|
||||||
|
echo "Current version: ${CURRENT}"
|
||||||
|
|
||||||
|
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
echo "New version: ${NEW_VERSION}"
|
||||||
|
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" package.json
|
||||||
|
sed -i "s/\"version\": \"${CURRENT}\"/\"version\": \"${NEW_VERSION}\"/" src-tauri/tauri.conf.json
|
||||||
|
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" src-tauri/Cargo.toml
|
||||||
|
sed -i "s/__version__ = \"${CURRENT}\"/__version__ = \"${NEW_VERSION}\"/" version.py
|
||||||
|
sed -i "s/__version_info__ = .*/__version_info__ = (${MAJOR}, ${MINOR}, ${NEW_PATCH})/" version.py
|
||||||
|
|
||||||
|
echo "new_version=${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=v${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and tag
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
NEW_VERSION="${{ steps.bump.outputs.new_version }}"
|
||||||
|
git add package.json src-tauri/tauri.conf.json src-tauri/Cargo.toml version.py
|
||||||
|
git commit -m "chore: bump version to ${NEW_VERSION} [skip ci]"
|
||||||
|
git tag "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
|
||||||
|
git pull --rebase "${REMOTE_URL}" main || true
|
||||||
|
git push "${REMOTE_URL}" HEAD:main
|
||||||
|
git push "${REMOTE_URL}" "v${NEW_VERSION}"
|
||||||
|
|
||||||
|
- name: Create Gitea release
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
RELEASE_NAME="Local Transcription ${TAG}"
|
||||||
|
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated build.\", \"draft\": false, \"prerelease\": false}" \
|
||||||
|
"${REPO_API}/releases"
|
||||||
|
echo "Created release: ${RELEASE_NAME}"
|
||||||
|
|
||||||
|
- name: Trigger per-OS app builds
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
|
||||||
|
for workflow in build-app-linux.yml build-app-windows.yml build-app-macos.yml; do
|
||||||
|
echo "Dispatching ${workflow} for ${TAG}..."
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"ref\": \"main\", \"inputs\": {\"tag\": \"${TAG}\"}}" \
|
||||||
|
"${REPO_API}/actions/workflows/${workflow}/dispatches")
|
||||||
|
echo " -> HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Clean up old app releases
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
KEEP=3
|
||||||
|
PROTECT_TAG="v1.4.0"
|
||||||
|
|
||||||
|
echo "Cleaning up old app releases (keeping latest ${KEEP} + ${PROTECT_TAG})..."
|
||||||
|
|
||||||
|
# Get all app releases (v* tags, not sidecar-v*)
|
||||||
|
RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases?limit=50" | jq -c '[.[] | select(.tag_name | startswith("v")) | select(.tag_name | startswith("sidecar") | not)]')
|
||||||
|
|
||||||
|
TOTAL=$(echo "$RELEASES" | jq 'length')
|
||||||
|
echo "Found ${TOTAL} app releases"
|
||||||
|
|
||||||
|
if [ "$TOTAL" -le "$KEEP" ]; then
|
||||||
|
echo "Nothing to clean up"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip the newest KEEP releases, delete the rest (except protected)
|
||||||
|
echo "$RELEASES" | jq -c ".[$KEEP:][]" | while read -r release; do
|
||||||
|
ID=$(echo "$release" | jq -r '.id')
|
||||||
|
TAG=$(echo "$release" | jq -r '.tag_name')
|
||||||
|
|
||||||
|
if [ "$TAG" = "$PROTECT_TAG" ]; then
|
||||||
|
echo " Protecting ${TAG}"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " Deleting release ${TAG} (ID: ${ID})..."
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${ID}"
|
||||||
|
|
||||||
|
# Also delete the tag
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/tags/${TAG}"
|
||||||
|
done
|
||||||
|
echo "Cleanup complete"
|
||||||
174
.gitea/workflows/sidecar-release.yml
Normal file
174
.gitea/workflows/sidecar-release.yml
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
name: Sidecar Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- 'client/**'
|
||||||
|
- 'server/**'
|
||||||
|
- 'backend/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'local-transcription-headless.spec'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: Run Tests
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Python tests
|
||||||
|
run: |
|
||||||
|
uv venv .testvenv
|
||||||
|
VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
|
||||||
|
.testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
|
||||||
|
|
||||||
|
bump-sidecar-version:
|
||||||
|
name: Bump sidecar version and tag
|
||||||
|
needs: test
|
||||||
|
if: "!contains(github.event.head_commit.message, '[skip ci]')"
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
version: ${{ steps.bump.outputs.version }}
|
||||||
|
tag: ${{ steps.bump.outputs.tag }}
|
||||||
|
has_changes: ${{ steps.check_changes.outputs.has_changes }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 2
|
||||||
|
|
||||||
|
- name: Check for backend changes
|
||||||
|
id: check_changes
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
CHANGED=$(git diff --name-only HEAD~1 HEAD -- client/ server/ backend/ pyproject.toml local-transcription-headless.spec 2>/dev/null || echo "")
|
||||||
|
if [ -n "$CHANGED" ]; then
|
||||||
|
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "Backend changes detected: $CHANGED"
|
||||||
|
else
|
||||||
|
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "No backend changes detected, skipping sidecar build"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Configure git
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
run: |
|
||||||
|
git config user.name "Gitea Actions"
|
||||||
|
git config user.email "actions@gitea.local"
|
||||||
|
|
||||||
|
- name: Bump sidecar patch version
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
id: bump
|
||||||
|
run: |
|
||||||
|
CURRENT=$(grep '^version = ' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
|
||||||
|
echo "Current sidecar version: ${CURRENT}"
|
||||||
|
|
||||||
|
MAJOR=$(echo "${CURRENT}" | cut -d. -f1)
|
||||||
|
MINOR=$(echo "${CURRENT}" | cut -d. -f2)
|
||||||
|
PATCH=$(echo "${CURRENT}" | cut -d. -f3)
|
||||||
|
NEW_PATCH=$((PATCH + 1))
|
||||||
|
NEW_VERSION="${MAJOR}.${MINOR}.${NEW_PATCH}"
|
||||||
|
echo "New sidecar version: ${NEW_VERSION}"
|
||||||
|
|
||||||
|
sed -i "s/^version = \"${CURRENT}\"/version = \"${NEW_VERSION}\"/" pyproject.toml
|
||||||
|
|
||||||
|
echo "version=${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=sidecar-v${NEW_VERSION}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and tag
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
NEW_VERSION="${{ steps.bump.outputs.version }}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
git add pyproject.toml
|
||||||
|
git commit -m "chore: bump sidecar version to ${NEW_VERSION} [skip ci]"
|
||||||
|
git tag "${TAG}"
|
||||||
|
|
||||||
|
REMOTE_URL=$(git remote get-url origin | sed "s|://|://gitea-actions:${BUILD_TOKEN}@|")
|
||||||
|
git pull --rebase "${REMOTE_URL}" main || true
|
||||||
|
git push "${REMOTE_URL}" HEAD:main
|
||||||
|
git push "${REMOTE_URL}" "${TAG}"
|
||||||
|
|
||||||
|
- name: Create Gitea release
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
VERSION="${{ steps.bump.outputs.version }}"
|
||||||
|
RELEASE_NAME="Sidecar v${VERSION}"
|
||||||
|
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"tag_name\": \"${TAG}\", \"name\": \"${RELEASE_NAME}\", \"body\": \"Automated sidecar build.\", \"draft\": false, \"prerelease\": false}" \
|
||||||
|
"${REPO_API}/releases"
|
||||||
|
echo "Created release: ${RELEASE_NAME}"
|
||||||
|
|
||||||
|
- name: Trigger per-OS sidecar builds
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
TAG="${{ steps.bump.outputs.tag }}"
|
||||||
|
|
||||||
|
for workflow in build-sidecar-linux.yml build-sidecar-windows.yml build-sidecar-macos.yml; do
|
||||||
|
echo "Dispatching ${workflow} for ${TAG}..."
|
||||||
|
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST \
|
||||||
|
-H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"ref\": \"main\", \"inputs\": {\"tag\": \"${TAG}\"}}" \
|
||||||
|
"${REPO_API}/actions/workflows/${workflow}/dispatches")
|
||||||
|
echo " -> HTTP ${HTTP_CODE}"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Clean up old sidecar releases
|
||||||
|
if: steps.check_changes.outputs.has_changes == 'true'
|
||||||
|
env:
|
||||||
|
BUILD_TOKEN: ${{ secrets.BUILD_TOKEN }}
|
||||||
|
run: |
|
||||||
|
REPO_API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}"
|
||||||
|
KEEP=2
|
||||||
|
|
||||||
|
echo "Cleaning up old sidecar releases (keeping latest ${KEEP})..."
|
||||||
|
|
||||||
|
# Get all sidecar releases (sidecar-v* tags)
|
||||||
|
RELEASES=$(curl -s -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases?limit=50" | jq -c '[.[] | select(.tag_name | startswith("sidecar-v"))]')
|
||||||
|
|
||||||
|
TOTAL=$(echo "$RELEASES" | jq 'length')
|
||||||
|
echo "Found ${TOTAL} sidecar releases"
|
||||||
|
|
||||||
|
if [ "$TOTAL" -le "$KEEP" ]; then
|
||||||
|
echo "Nothing to clean up"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Skip the newest KEEP releases, delete the rest
|
||||||
|
echo "$RELEASES" | jq -c ".[$KEEP:][]" | while read -r release; do
|
||||||
|
ID=$(echo "$release" | jq -r '.id')
|
||||||
|
TAG=$(echo "$release" | jq -r '.tag_name')
|
||||||
|
|
||||||
|
echo " Deleting sidecar release ${TAG} (ID: ${ID})..."
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/releases/${ID}"
|
||||||
|
|
||||||
|
# Also delete the tag
|
||||||
|
curl -s -X DELETE -H "Authorization: token ${BUILD_TOKEN}" \
|
||||||
|
"${REPO_API}/tags/${TAG}"
|
||||||
|
done
|
||||||
|
echo "Cleanup complete"
|
||||||
66
.gitea/workflows/test.yml
Normal file
66
.gitea/workflows/test.yml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
name: Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
python-tests:
|
||||||
|
name: Python Backend Tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
run: |
|
||||||
|
if command -v uv &> /dev/null; then
|
||||||
|
echo "uv already installed: $(uv --version)"
|
||||||
|
else
|
||||||
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
|
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run pytest
|
||||||
|
run: |
|
||||||
|
uv venv .testvenv
|
||||||
|
VIRTUAL_ENV=.testvenv uv pip install pytest httpx pytest-asyncio anyio fastapi pydantic pyyaml uvicorn requests
|
||||||
|
.testvenv/bin/python -m pytest backend/tests/ client/tests/ -v --tb=short
|
||||||
|
|
||||||
|
frontend-tests:
|
||||||
|
name: Frontend Tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Run Vitest
|
||||||
|
run: npx vitest run
|
||||||
|
|
||||||
|
rust-tests:
|
||||||
|
name: Rust Sidecar Tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install Rust
|
||||||
|
run: |
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
||||||
|
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
- name: Install Tauri system dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.1-dev libappindicator3-dev librsvg2-dev patchelf
|
||||||
|
|
||||||
|
- name: Run cargo test
|
||||||
|
working-directory: src-tauri
|
||||||
|
run: cargo test
|
||||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -10,8 +10,8 @@ dist/
|
|||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
lib/
|
/lib/
|
||||||
lib64/
|
/lib64/
|
||||||
parts/
|
parts/
|
||||||
sdist/
|
sdist/
|
||||||
var/
|
var/
|
||||||
@@ -54,3 +54,15 @@ models/
|
|||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
*.spec.lock
|
*.spec.lock
|
||||||
|
|
||||||
|
# Node.js
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Vite / Svelte build output
|
||||||
|
dist/
|
||||||
|
|
||||||
|
# Tauri
|
||||||
|
src-tauri/target/
|
||||||
|
|
||||||
|
# Windows NTFS alternate data streams
|
||||||
|
*:Zone.Identifier
|
||||||
|
|||||||
Binary file not shown.
426
CLAUDE.md
426
CLAUDE.md
@@ -4,52 +4,114 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|
||||||
Local Transcription is a desktop application for real-time speech-to-text transcription designed for streamers. It uses Whisper models (via faster-whisper) to transcribe audio locally with optional multi-user server synchronization.
|
Local Transcription is a cross-platform desktop application for real-time speech-to-text transcription designed for streamers. It supports local Whisper models and cloud-based Deepgram transcription, with OBS browser source integration and optional multi-user sync.
|
||||||
|
|
||||||
|
**Architecture:** Two-process model — a Tauri v2 shell (Svelte 5 frontend) communicates with a headless Python backend (sidecar) via REST API and WebSocket.
|
||||||
|
|
||||||
**Key Features:**
|
**Key Features:**
|
||||||
- Standalone desktop GUI (PySide6/Qt)
|
- Cross-platform desktop app (Windows, macOS, Linux) via Tauri v2 + Svelte 5
|
||||||
- Local transcription with CPU/GPU support
|
- Headless Python backend with FastAPI control API
|
||||||
- Built-in web server for OBS browser source integration
|
- Dual transcription modes: local Whisper or cloud Deepgram (managed/BYOK)
|
||||||
- Optional Node.js-based multi-user server for syncing transcriptions across users
|
- Built-in web server for OBS browser source at `http://localhost:8080`
|
||||||
- Noise suppression and Voice Activity Detection (VAD)
|
- Optional multi-user sync via Node.js server
|
||||||
- Cross-platform builds (Linux/Windows) with PyInstaller
|
- CUDA, MPS (Apple Silicon), and CPU support
|
||||||
|
- Auto-updates, custom fonts, configurable colors
|
||||||
|
|
||||||
|
> **Legacy GUI:** The original PySide6/Qt GUI (`main.py`, `gui/`) still works during the transition. New features should target the Tauri frontend and headless backend.
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
local-transcription/
|
local-transcription/
|
||||||
├── client/ # Core transcription logic
|
├── src/ # Svelte 5 frontend (Tauri UI)
|
||||||
│ ├── audio_capture.py # Audio input and buffering
|
│ ├── App.svelte # Main app shell
|
||||||
│ ├── transcription_engine.py # Whisper model integration
|
│ ├── app.css # Global dark theme styles
|
||||||
│ ├── noise_suppression.py # VAD and noise reduction
|
│ ├── main.ts # Svelte mount point
|
||||||
│ ├── device_utils.py # CPU/GPU device management
|
│ ├── lib/components/ # UI components
|
||||||
│ ├── config.py # Configuration management
|
│ │ ├── Header.svelte # Title bar + settings button
|
||||||
│ └── server_sync.py # Multi-user server sync client
|
│ │ ├── StatusBar.svelte # State indicator, device, user info
|
||||||
├── gui/ # Desktop application UI
|
│ │ ├── Controls.svelte # Start/Stop, Clear, Save buttons
|
||||||
│ ├── main_window_qt.py # Main application window (PySide6)
|
│ │ ├── TranscriptionDisplay.svelte # Scrolling transcript view
|
||||||
│ ├── settings_dialog_qt.py # Settings dialog (PySide6)
|
│ │ └── Settings.svelte # Full settings modal (all sections)
|
||||||
│ └── transcription_display_qt.py # Display widget
|
│ └── lib/stores/ # Svelte 5 reactive stores ($state/$derived)
|
||||||
├── server/ # Web display servers
|
│ ├── backend.ts # WebSocket + REST API client
|
||||||
│ ├── web_display.py # FastAPI server for OBS browser source (local)
|
│ ├── config.ts # App configuration fetch/update
|
||||||
│ └── nodejs/ # Optional multi-user Node.js server
|
│ └── transcriptions.ts # Transcript data management
|
||||||
│ ├── server.js # Multi-user sync server with WebSocket
|
├── src-tauri/ # Tauri v2 Rust shell
|
||||||
│ ├── package.json # Node.js dependencies
|
│ ├── src/lib.rs # Plugin registration (shell, dialog, process)
|
||||||
│ └── README.md # Server deployment documentation
|
│ ├── src/main.rs # Entry point
|
||||||
├── config/ # Example configuration files
|
│ ├── tauri.conf.json # Window, bundle, plugin config
|
||||||
│ └── default_config.yaml # Default settings template
|
│ └── Cargo.toml # Rust dependencies
|
||||||
├── main.py # GUI application entry point
|
├── backend/ # Headless Python backend (the sidecar)
|
||||||
├── main_cli.py # CLI version for testing
|
│ ├── app_controller.py # Core orchestration (engine, sync, config)
|
||||||
└── pyproject.toml # Dependencies and build config
|
│ ├── api_server.py # FastAPI REST endpoints + /ws/control
|
||||||
|
│ └── main_headless.py # Headless entry point (prints JSON to stdout)
|
||||||
|
├── client/ # Core transcription modules (used by backend)
|
||||||
|
│ ├── audio_capture.py # Audio input handling
|
||||||
|
│ ├── transcription_engine_realtime.py # RealtimeSTT / Whisper engine
|
||||||
|
│ ├── deepgram_transcription.py # Deepgram WebSocket cloud transcription
|
||||||
|
│ ├── noise_suppression.py # VAD and noise reduction
|
||||||
|
│ ├── device_utils.py # CPU/GPU/MPS detection
|
||||||
|
│ ├── config.py # YAML config management (~/.local-transcription/)
|
||||||
|
│ ├── server_sync.py # Multi-user server sync client
|
||||||
|
│ ├── instance_lock.py # Single-instance PID lock
|
||||||
|
│ └── update_checker.py # Gitea release update checker
|
||||||
|
├── gui/ # Legacy PySide6/Qt GUI (still functional)
|
||||||
|
│ ├── main_window_qt.py # Main window (orchestration lives here in legacy)
|
||||||
|
│ ├── settings_dialog_qt.py # Settings dialog
|
||||||
|
│ └── transcription_display_qt.py # Display widget
|
||||||
|
├── server/
|
||||||
|
│ ├── web_display.py # FastAPI OBS display server (WebSocket + HTML)
|
||||||
|
│ └── nodejs/ # Optional multi-user sync server
|
||||||
|
├── .gitea/workflows/ # CI/CD
|
||||||
|
│ ├── release.yml # Coordinator: version bump, tag, release creation
|
||||||
|
│ ├── build-app-linux.yml # Linux Tauri app build (triggered by v* tag)
|
||||||
|
│ ├── build-app-windows.yml # Windows Tauri app build (triggered by v* tag)
|
||||||
|
│ ├── build-app-macos.yml # macOS Tauri app build (triggered by v* tag)
|
||||||
|
│ ├── sidecar-release.yml # Sidecar coordinator: version bump, tag, release
|
||||||
|
│ ├── build-sidecar-linux.yml # Linux sidecar build (triggered by sidecar-v* tag)
|
||||||
|
│ ├── build-sidecar-windows.yml # Windows sidecar build (triggered by sidecar-v* tag)
|
||||||
|
│ └── build-sidecar-macos.yml # macOS sidecar build (triggered by sidecar-v* tag)
|
||||||
|
├── config/default_config.yaml # Default settings template
|
||||||
|
├── main.py # Legacy PySide6 GUI entry point
|
||||||
|
├── main_cli.py # CLI version for testing
|
||||||
|
├── version.py # Version string (__version__)
|
||||||
|
├── local-transcription.spec # PyInstaller config (legacy, includes PySide6)
|
||||||
|
├── local-transcription-headless.spec # PyInstaller config (headless sidecar, no Qt)
|
||||||
|
├── pyproject.toml # Python deps (uv, CUDA PyTorch index)
|
||||||
|
├── package.json # Node/Tauri deps
|
||||||
|
└── vite.config.ts # Vite build config ($lib alias)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Development Commands
|
## Development Commands
|
||||||
|
|
||||||
### Installation and Setup
|
### Frontend (Tauri + Svelte)
|
||||||
```bash
|
```bash
|
||||||
# Install dependencies (creates .venv automatically)
|
# Install npm dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Run Tauri in development mode (hot-reload)
|
||||||
|
npm run tauri dev
|
||||||
|
|
||||||
|
# Build frontend only (for testing)
|
||||||
|
npx vite build
|
||||||
|
|
||||||
|
# Type-check Svelte
|
||||||
|
npx svelte-check
|
||||||
|
|
||||||
|
# Check Rust compiles
|
||||||
|
cd src-tauri && cargo check
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backend (Python)
|
||||||
|
```bash
|
||||||
|
# Install Python dependencies
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
# Run the GUI application
|
# Run the headless backend standalone (for development)
|
||||||
|
uv run python -m backend.main_headless --port 8080
|
||||||
|
|
||||||
|
# Run the legacy PySide6 GUI
|
||||||
uv run python main.py
|
uv run python main.py
|
||||||
|
|
||||||
# Run CLI version (headless, for testing)
|
# Run CLI version (headless, for testing)
|
||||||
@@ -57,257 +119,163 @@ uv run python main_cli.py
|
|||||||
|
|
||||||
# List available audio devices
|
# List available audio devices
|
||||||
uv run python main_cli.py --list-devices
|
uv run python main_cli.py --list-devices
|
||||||
|
|
||||||
# Install with CUDA support (if needed)
|
|
||||||
uv pip install torch --index-url https://download.pytorch.org/whl/cu121
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building Executables
|
### Building
|
||||||
```bash
|
```bash
|
||||||
# Linux (includes CUDA support - works on both GPU and CPU systems)
|
# Build Tauri app (produces platform installer)
|
||||||
./build.sh
|
npm run tauri build
|
||||||
|
|
||||||
# Windows (includes CUDA support - works on both GPU and CPU systems)
|
# Build headless Python sidecar (no PySide6)
|
||||||
build.bat
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
# Output: dist/local-transcription-backend/
|
||||||
|
|
||||||
# Manual build with PyInstaller
|
# Build legacy PySide6 app
|
||||||
uv sync # Install dependencies (includes CUDA PyTorch)
|
|
||||||
uv pip uninstall -q enum34 # Remove incompatible enum34 package
|
|
||||||
uv run pyinstaller local-transcription.spec
|
uv run pyinstaller local-transcription.spec
|
||||||
|
# Or use: ./build.sh (Linux) / build.bat (Windows)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Important:** All builds include CUDA support via `pyproject.toml` configuration. CUDA builds can be created on systems without NVIDIA GPUs. The PyTorch CUDA runtime is bundled, and the app automatically falls back to CPU if no GPU is available.
|
|
||||||
|
|
||||||
### Testing
|
### Testing
|
||||||
```bash
|
```bash
|
||||||
# Run component tests
|
|
||||||
uv run python test_components.py
|
uv run python test_components.py
|
||||||
|
|
||||||
# Check CUDA availability
|
|
||||||
uv run python check_cuda.py
|
uv run python check_cuda.py
|
||||||
|
|
||||||
# Test web server manually
|
|
||||||
uv run python -m uvicorn server.web_display:app --reload
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Architecture
|
## Architecture Details
|
||||||
|
|
||||||
### Audio Processing Pipeline
|
### Communication: Tauri <-> Python Backend
|
||||||
|
|
||||||
1. **Audio Capture** ([client/audio_capture.py](client/audio_capture.py))
|
The Svelte frontend connects to the Python backend via two channels:
|
||||||
- Captures audio from microphone/system using sounddevice
|
|
||||||
- Handles automatic sample rate detection and resampling
|
|
||||||
- Uses chunking with overlap for better transcription quality
|
|
||||||
- Default: 3-second chunks with 0.5s overlap
|
|
||||||
|
|
||||||
2. **Noise Suppression** ([client/noise_suppression.py](client/noise_suppression.py))
|
**REST API** (on port 8081 by default):
|
||||||
- Applies noisereduce for background noise reduction
|
- `GET /api/status` — app state, device info, version
|
||||||
- Voice Activity Detection (VAD) using webrtcvad
|
- `POST /api/start` / `POST /api/stop` — transcription control
|
||||||
- Skips silent segments to improve performance
|
- `GET /api/config` / `PUT /api/config` — read/write settings (dot-notation keys)
|
||||||
|
- `GET /api/audio-devices` / `GET /api/compute-devices` — device enumeration
|
||||||
|
- `POST /api/reload-engine` — reload with new model/device
|
||||||
|
- `GET /api/transcriptions` / `POST /api/clear` — transcript management
|
||||||
|
- `POST /api/save-file` — write text to a file path
|
||||||
|
- `GET /api/check-update` / `POST /api/skip-version` — update management
|
||||||
|
- `POST /api/login` / `POST /api/register` / `GET /api/balance` — managed mode proxy
|
||||||
|
|
||||||
3. **Transcription** ([client/transcription_engine.py](client/transcription_engine.py))
|
**WebSocket** `/ws/control`:
|
||||||
- Uses faster-whisper for efficient inference
|
- Pushes real-time events: `state_changed`, `transcription`, `preview`, `error`, `credits_low`
|
||||||
- Supports CPU, CUDA, and Apple MPS (Mac)
|
- Client sends keepalive pings
|
||||||
- Models: tiny, base, small, medium, large
|
|
||||||
- Thread-safe model loading with locks
|
|
||||||
|
|
||||||
4. **Display** ([gui/main_window_qt.py](gui/main_window_qt.py))
|
The OBS display server runs separately on port 8080 (`GET /` for HTML, `WebSocket /ws` for transcriptions).
|
||||||
- PySide6/Qt-based desktop GUI
|
|
||||||
- Real-time transcription display with scrolling
|
|
||||||
- Settings panel with live updates (no restart needed)
|
|
||||||
|
|
||||||
### Web Server Architecture
|
### Backend Process Lifecycle
|
||||||
|
|
||||||
**Local Web Server** ([server/web_display.py](server/web_display.py))
|
1. `main_headless.py` starts, acquires instance lock, creates `AppController`
|
||||||
- Always runs when GUI starts (port 8080 by default)
|
2. `AppController.initialize()` starts the OBS web server (port 8080) and engine init thread
|
||||||
- FastAPI with WebSocket for real-time updates
|
3. `APIServer` wraps the controller with FastAPI routes, runs on port 8081
|
||||||
- Used for OBS browser source integration
|
4. Backend prints `{"event": "ready", "port": 8080}` to stdout for Tauri to discover
|
||||||
- Single-user (displays only local transcriptions)
|
5. On shutdown: engine stopped, web server stopped, lock released
|
||||||
|
|
||||||
**Multi-User Server** (Optional - for syncing across multiple users)
|
### Headless Backend vs Legacy GUI
|
||||||
|
|
||||||
**Node.js WebSocket Server** ([server/nodejs/](server/nodejs/)) - **RECOMMENDED**
|
The `AppController` class (`backend/app_controller.py`) extracts all orchestration logic from `gui/main_window_qt.py` into a Qt-free class. The mapping:
|
||||||
- Real-time WebSocket support (< 100ms latency)
|
|
||||||
- Handles 100+ concurrent users
|
|
||||||
- Easy deployment to VPS/cloud hosting (Railway, Heroku, DigitalOcean, or any VPS)
|
|
||||||
- Configurable display options via URL parameters:
|
|
||||||
- `timestamps=true/false` - Show/hide timestamps
|
|
||||||
- `maxlines=50` - Maximum visible lines (prevents scroll bars in OBS)
|
|
||||||
- `fontsize=16` - Font size in pixels
|
|
||||||
- `fontfamily=Arial` - Font family
|
|
||||||
- `fade=10` - Seconds before text fades (0 = never)
|
|
||||||
|
|
||||||
See [server/nodejs/README.md](server/nodejs/README.md) for deployment instructions
|
| Legacy (MainWindow) | Headless (AppController) |
|
||||||
|
|---------------------|--------------------------|
|
||||||
|
| `_initialize_components()` | `_initialize_engine()` |
|
||||||
|
| `_start_transcription()` | `start_transcription()` |
|
||||||
|
| `_stop_transcription()` | `stop_transcription()` |
|
||||||
|
| `_on_settings_saved()` | `apply_settings()` |
|
||||||
|
| `_reload_engine()` | `reload_engine()` |
|
||||||
|
| `_start_web_server_if_enabled()` | `_start_web_server()` |
|
||||||
|
| `_start_server_sync()` | `_start_server_sync()` |
|
||||||
|
| Qt signals | Callbacks (`on_state_changed`, `on_transcription`, etc.) |
|
||||||
|
|
||||||
### Configuration System
|
### Threading Model (Headless)
|
||||||
|
|
||||||
- Config stored at `~/.local-transcription/config.yaml`
|
- Main thread: Uvicorn (FastAPI) event loop
|
||||||
- Managed by [client/config.py](client/config.py)
|
- Engine init thread: Downloads models, initializes VAD
|
||||||
- Settings apply immediately without restart (except model changes)
|
- Web server thread: Separate asyncio loop for OBS display
|
||||||
- YAML format with nested keys (e.g., `transcription.model`)
|
- Audio capture: Runs in engine callback threads
|
||||||
|
- All results flow through `AppController` callbacks -> `APIServer` WebSocket broadcast
|
||||||
|
|
||||||
### Device Management
|
### Svelte Frontend
|
||||||
|
|
||||||
- [client/device_utils.py](client/device_utils.py) handles CPU/GPU detection
|
Uses Svelte 5 runes throughout (`$state`, `$derived`, `$effect`, `$props`). No Svelte 4 patterns.
|
||||||
- Auto-detects CUDA, MPS (Mac), or falls back to CPU
|
|
||||||
- Compute types: float32 (best quality), float16 (GPU), int8 (fastest)
|
|
||||||
- Thread-safe device selection
|
|
||||||
|
|
||||||
## Key Implementation Details
|
**Stores** (`src/lib/stores/`):
|
||||||
|
- `backend.ts` — WebSocket connection + REST helpers (`apiGet`, `apiPost`, `apiPut`), auto-reconnect
|
||||||
|
- `config.ts` — fetches/updates config from backend API
|
||||||
|
- `transcriptions.ts` — manages transcript list, listens for `CustomEvent`s from backend store
|
||||||
|
|
||||||
### PyInstaller Build Configuration
|
**Key patterns:**
|
||||||
|
- Backend store dispatches `CustomEvent`s on `window` for cross-store communication
|
||||||
|
- Settings component collects all changed values into a `Record<string, any>` with dot-notation keys, sends via `PUT /api/config`
|
||||||
|
- Controls use Tauri dialog plugin for native file save, falls back to blob download
|
||||||
|
|
||||||
- [local-transcription.spec](local-transcription.spec) controls build
|
## CI/CD
|
||||||
- UPX compression enabled for smaller executables
|
|
||||||
- Hidden imports required for PySide6, faster-whisper, torch
|
|
||||||
- Console mode enabled by default (set `console=False` to hide)
|
|
||||||
|
|
||||||
### Threading Model
|
Eight Gitea Actions workflows in `.gitea/workflows/`, split into coordinators and per-OS builders:
|
||||||
|
|
||||||
- Main thread: Qt GUI event loop
|
**App release (Tauri):**
|
||||||
- Audio thread: Captures and processes audio chunks
|
- **`release.yml`**: Coordinator. Triggers on push to `main`. Auto-bumps version in package.json/tauri.conf.json/Cargo.toml/version.py, commits, tags `v{VERSION}`, creates Gitea release.
|
||||||
- Web server thread: Runs FastAPI server
|
- **`build-app-linux.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.deb`/`.rpm`/`.AppImage`.
|
||||||
- Transcription: Runs in callback thread from audio capture
|
- **`build-app-windows.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.msi`/`*-setup.exe`.
|
||||||
- All transcription results communicated via Qt signals
|
- **`build-app-macos.yml`**: Triggers on `v*` tag push or `workflow_dispatch`. Builds Tauri app, uploads `.dmg`.
|
||||||
|
|
||||||
### Server Sync (Optional Multi-User Feature)
|
**Sidecar release (Python backend):**
|
||||||
|
- **`sidecar-release.yml`**: Coordinator. Triggers on push to `main` with changes in `client/`, `server/`, `backend/`, `pyproject.toml`, or `local-transcription-headless.spec`. Bumps version in pyproject.toml/version.py, tags `sidecar-v{VERSION}`, creates Gitea release.
|
||||||
|
- **`build-sidecar-linux.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CUDA + CPU sidecars via PyInstaller.
|
||||||
|
- **`build-sidecar-windows.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CUDA + CPU sidecars via PyInstaller.
|
||||||
|
- **`build-sidecar-macos.yml`**: Triggers on `sidecar-v*` tag push or `workflow_dispatch`. Builds CPU-only sidecar via PyInstaller.
|
||||||
|
|
||||||
- [client/server_sync.py](client/server_sync.py) handles server communication
|
All per-OS build workflows can be re-run independently via `workflow_dispatch` with an optional `tag` input. All require a `BUILD_TOKEN` secret (Gitea API token with release write access).
|
||||||
- Toggle in Settings: "Enable Server Sync"
|
|
||||||
- Sends transcriptions to Node.js server via HTTP POST
|
|
||||||
- Real-time updates via WebSocket to display page
|
|
||||||
- Per-speaker font support (Web-Safe, Google Fonts, Custom uploads)
|
|
||||||
- Falls back gracefully if server unavailable
|
|
||||||
|
|
||||||
## Common Patterns
|
## Common Patterns
|
||||||
|
|
||||||
### Adding a New Setting
|
### Adding a New Setting
|
||||||
|
|
||||||
1. Add to [config/default_config.yaml](config/default_config.yaml)
|
1. Add default to [config/default_config.yaml](config/default_config.yaml)
|
||||||
2. Update [client/config.py](client/config.py) if validation needed
|
2. Add UI control in [src/lib/components/Settings.svelte](src/lib/components/Settings.svelte)
|
||||||
3. Add UI control in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
3. Ensure the setting is included in the save handler's config update
|
||||||
4. Apply setting in relevant component (no restart if possible)
|
4. Apply in `AppController.apply_settings()` or the relevant component
|
||||||
5. Emit signal to update display if needed
|
5. For legacy GUI: also update [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
||||||
|
|
||||||
|
### Adding a New API Endpoint
|
||||||
|
|
||||||
|
1. Add route in [backend/api_server.py](backend/api_server.py) `_setup_routes()`
|
||||||
|
2. Add supporting logic in [backend/app_controller.py](backend/app_controller.py) if needed
|
||||||
|
3. Call from Svelte via `backendStore.apiGet/apiPost/apiPut`
|
||||||
|
|
||||||
### Modifying Transcription Display
|
### Modifying Transcription Display
|
||||||
|
|
||||||
- Local GUI: [gui/transcription_display_qt.py](gui/transcription_display_qt.py)
|
- Tauri UI: [src/lib/components/TranscriptionDisplay.svelte](src/lib/components/TranscriptionDisplay.svelte)
|
||||||
- Local web display (OBS): [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
|
- OBS display: [server/web_display.py](server/web_display.py) (HTML in `_get_html()`)
|
||||||
- Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
|
- Multi-user display: [server/nodejs/server.js](server/nodejs/server.js) (display page in `/display` route)
|
||||||
|
|
||||||
### Adding a New Model Size
|
|
||||||
|
|
||||||
- Update [client/transcription_engine.py](client/transcription_engine.py)
|
|
||||||
- Add to model selector in [gui/settings_dialog_qt.py](gui/settings_dialog_qt.py)
|
|
||||||
- Update CLI argument choices in [main_cli.py](main_cli.py)
|
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
**Core:**
|
**Frontend:** Tauri v2, Svelte 5, Vite, TypeScript
|
||||||
- `faster-whisper`: Optimized Whisper inference
|
**Backend:** Python 3.9+, FastAPI, Uvicorn, RealtimeSTT, faster-whisper, PyTorch (CUDA), sounddevice
|
||||||
- `torch`: ML framework (CUDA-enabled via special index)
|
**Build:** PyInstaller (sidecar), Tauri CLI (app), uv (Python packages)
|
||||||
- `PySide6`: Qt6 bindings for GUI
|
**CI:** Gitea Actions with platform-specific runners
|
||||||
- `sounddevice`: Cross-platform audio I/O
|
|
||||||
- `noisereduce`, `webrtcvad`: Audio preprocessing
|
|
||||||
|
|
||||||
**Web Server:**
|
|
||||||
- `fastapi`, `uvicorn`: Web server and ASGI
|
|
||||||
- `websockets`: Real-time communication
|
|
||||||
|
|
||||||
**Build:**
|
|
||||||
- `pyinstaller`: Create standalone executables
|
|
||||||
- `uv`: Fast package manager
|
|
||||||
|
|
||||||
**PyTorch CUDA Index:**
|
|
||||||
- Configured in [pyproject.toml](pyproject.toml) under `[[tool.uv.index]]`
|
|
||||||
- Uses PyTorch's custom wheel repository for CUDA builds
|
|
||||||
- Automatically installed with `uv sync` when using CUDA build scripts
|
|
||||||
|
|
||||||
## Platform-Specific Notes
|
## Platform-Specific Notes
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
- Uses PulseAudio/ALSA for audio
|
- Tauri needs: `libgtk-3-dev`, `libwebkit2gtk-4.1-dev`, `libappindicator3-dev`, `librsvg2-dev`, `patchelf`
|
||||||
- Build scripts use bash (`.sh` files)
|
- Audio: PulseAudio/ALSA via sounddevice
|
||||||
- Executable: `dist/LocalTranscription/LocalTranscription`
|
|
||||||
|
|
||||||
### Windows
|
### Windows
|
||||||
- Uses Windows Audio/WASAPI
|
- Tauri needs: WebView2 (usually pre-installed on Windows 10+)
|
||||||
- Build scripts use batch (`.bat` files)
|
- Audio: WASAPI via sounddevice
|
||||||
- Executable: `dist\LocalTranscription\LocalTranscription.exe`
|
|
||||||
- Requires Visual C++ Redistributable on target systems
|
|
||||||
|
|
||||||
### Cross-Building
|
### macOS
|
||||||
- **Cannot cross-compile** - must build on target platform
|
- Tauri needs: Xcode Command Line Tools
|
||||||
- CI/CD should use platform-specific runners
|
- Audio: CoreAudio via sounddevice
|
||||||
|
- GPU: MPS (Apple Silicon) detected by `device_utils.py`
|
||||||
## Troubleshooting
|
- `Info.plist` must include `NSMicrophoneUsageDescription` for mic access
|
||||||
|
- No CUDA builds — CPU/MPS only
|
||||||
### Model Loading Issues
|
|
||||||
- Models download to `~/.cache/huggingface/`
|
|
||||||
- First run requires internet connection
|
|
||||||
- Check disk space (models: 75MB-3GB depending on size)
|
|
||||||
|
|
||||||
### Audio Device Issues
|
|
||||||
- Run `uv run python main_cli.py --list-devices`
|
|
||||||
- Check permissions (microphone access)
|
|
||||||
- Try different device indices in settings
|
|
||||||
|
|
||||||
### GPU Not Detected
|
|
||||||
- Run `uv run python check_cuda.py`
|
|
||||||
- Install CUDA drivers (not CUDA toolkit - bundled in build)
|
|
||||||
- Verify PyTorch sees GPU: `python -c "import torch; print(torch.cuda.is_available())"`
|
|
||||||
|
|
||||||
### Web Server Port Conflicts
|
|
||||||
- Default port: 8080
|
|
||||||
- Change in [gui/main_window_qt.py](gui/main_window_qt.py) or config
|
|
||||||
- Use `lsof -i :8080` (Linux) or `netstat -ano | findstr :8080` (Windows)
|
|
||||||
|
|
||||||
## OBS Integration
|
|
||||||
|
|
||||||
### Local Display (Single User)
|
|
||||||
1. Start Local Transcription app
|
|
||||||
2. In OBS: Add "Browser" source
|
|
||||||
3. URL: `http://localhost:8080`
|
|
||||||
4. Set dimensions (e.g., 1920x300)
|
|
||||||
|
|
||||||
### Multi-User Display (Node.js Server)
|
|
||||||
1. Deploy Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
|
|
||||||
2. Each user configures Server URL: `http://your-server:3000/api/send`
|
|
||||||
3. Enter same room name and passphrase
|
|
||||||
4. In OBS: Add "Browser" source
|
|
||||||
5. URL: `http://your-server:3000/display?room=ROOM&fade=10×tamps=true&maxlines=50&fontsize=16`
|
|
||||||
6. Customize URL parameters as needed:
|
|
||||||
- `timestamps=false` - Hide timestamps
|
|
||||||
- `maxlines=30` - Show max 30 lines (prevents scroll bars)
|
|
||||||
- `fontsize=18` - Larger font
|
|
||||||
- `fontfamily=Courier` - Different font
|
|
||||||
|
|
||||||
## Performance Optimization
|
|
||||||
|
|
||||||
**For Real-Time Transcription:**
|
|
||||||
- Use `tiny` or `base` model (faster)
|
|
||||||
- Enable GPU if available (5-10x faster)
|
|
||||||
- Increase chunk_duration for better accuracy (higher latency)
|
|
||||||
- Decrease chunk_duration for lower latency (less context)
|
|
||||||
- Enable VAD to skip silent audio
|
|
||||||
|
|
||||||
**For Build Size Reduction:**
|
|
||||||
- Don't bundle models (download on demand)
|
|
||||||
- Use CPU-only build if no GPU users
|
|
||||||
- Enable UPX compression (already in spec)
|
|
||||||
|
|
||||||
## Phase Status
|
|
||||||
|
|
||||||
- ✅ **Phase 1**: Standalone desktop application (complete)
|
|
||||||
- ✅ **Web Server**: Local OBS integration (complete)
|
|
||||||
- ✅ **Builds**: PyInstaller executables (complete)
|
|
||||||
- ✅ **Phase 2**: Multi-user Node.js server (complete, optional)
|
|
||||||
- ⏸️ **Phase 3+**: Advanced features (see [NEXT_STEPS.md](NEXT_STEPS.md))
|
|
||||||
|
|
||||||
## Related Documentation
|
## Related Documentation
|
||||||
|
|
||||||
- [README.md](README.md) - User-facing documentation
|
- [README.md](README.md) — User-facing documentation
|
||||||
- [BUILD.md](BUILD.md) - Detailed build instructions
|
- [BUILD.md](BUILD.md) — Detailed build instructions
|
||||||
- [INSTALL.md](INSTALL.md) - Installation guide
|
- [INSTALL.md](INSTALL.md) — Installation guide
|
||||||
- [NEXT_STEPS.md](NEXT_STEPS.md) - Future enhancements
|
- [server/nodejs/README.md](server/nodejs/README.md) — Node.js server setup
|
||||||
- [server/nodejs/README.md](server/nodejs/README.md) - Node.js server setup and deployment
|
|
||||||
|
|||||||
574
DEEPGRAM_PROXY_PLAN.md
Normal file
574
DEEPGRAM_PROXY_PLAN.md
Normal file
@@ -0,0 +1,574 @@
|
|||||||
|
# Deepgram Proxy Service — Build Plan
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
Build a standalone hosted service that acts as a Deepgram proxy for the Local Transcription
|
||||||
|
desktop app. Users can either provide their own Deepgram API key (BYOK) or use the managed
|
||||||
|
service with prepaid credits purchased via Stripe.
|
||||||
|
|
||||||
|
This is a **separate repository** from `local-transcription`. The desktop app will be updated
|
||||||
|
in a second phase to support both modes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Repository Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
transcription-proxy/
|
||||||
|
├── src/
|
||||||
|
│ ├── server.js # Express app entry point
|
||||||
|
│ ├── config.js # Environment config loader
|
||||||
|
│ ├── db/
|
||||||
|
│ │ ├── index.js # node-postgres pool setup
|
||||||
|
│ │ └── migrations/ # SQL migration files (numbered)
|
||||||
|
│ │ ├── 001_users.sql
|
||||||
|
│ │ ├── 002_credits.sql
|
||||||
|
│ │ ├── 003_sessions.sql
|
||||||
|
│ │ └── 004_usage_ledger.sql
|
||||||
|
│ ├── middleware/
|
||||||
|
│ │ ├── auth.js # JWT verification middleware
|
||||||
|
│ │ └── rateLimit.js # Per-user rate limiting
|
||||||
|
│ ├── routes/
|
||||||
|
│ │ ├── auth.js # POST /auth/register, /auth/login, /auth/refresh
|
||||||
|
│ │ ├── billing.js # POST /billing/checkout, GET /billing/balance
|
||||||
|
│ │ └── account.js # GET /account/me, GET /account/usage
|
||||||
|
│ ├── websocket/
|
||||||
|
│ │ └── proxy.js # WebSocket proxy handler (core feature)
|
||||||
|
│ └── webhooks/
|
||||||
|
│ └── stripe.js # POST /webhooks/stripe
|
||||||
|
├── web/ # Simple frontend dashboard
|
||||||
|
│ ├── index.html # Landing / login page
|
||||||
|
│ ├── dashboard.html # Balance, usage history, buy credits
|
||||||
|
│ └── assets/
|
||||||
|
│ ├── app.js
|
||||||
|
│ └── style.css
|
||||||
|
├── .env.example
|
||||||
|
├── package.json
|
||||||
|
├── docker-compose.yml # Postgres + app for local dev
|
||||||
|
└── CLAUDE.md # This file (after renaming)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Technology Stack
|
||||||
|
|
||||||
|
- **Runtime**: Node.js 20+
|
||||||
|
- **Framework**: Express 4
|
||||||
|
- **WebSocket**: `ws` library (not socket.io — keep it lean)
|
||||||
|
- **Database**: PostgreSQL 15+ via `pg` (node-postgres)
|
||||||
|
- **Auth**: JWT via `jsonwebtoken`, passwords hashed with `bcrypt`
|
||||||
|
- **Payments**: Stripe Node SDK (`stripe`)
|
||||||
|
- **Environment**: `dotenv`
|
||||||
|
- **Dev tooling**: `nodemon` for dev, no TypeScript (keep it simple)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
Run migrations in order. Use a simple `schema_migrations` table to track applied migrations.
|
||||||
|
|
||||||
|
### 001_users.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE schema_migrations (
|
||||||
|
version INTEGER PRIMARY KEY,
|
||||||
|
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE users (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
email TEXT UNIQUE NOT NULL,
|
||||||
|
password_hash TEXT NOT NULL,
|
||||||
|
stripe_customer_id TEXT UNIQUE,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 002_credits.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE credit_balance (
|
||||||
|
user_id UUID PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
|
||||||
|
seconds_remaining INTEGER NOT NULL DEFAULT 0,
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 003_sessions.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE transcription_sessions (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id),
|
||||||
|
mode TEXT NOT NULL CHECK (mode IN ('managed', 'byok')),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
ended_at TIMESTAMPTZ,
|
||||||
|
seconds_used INTEGER NOT NULL DEFAULT 0,
|
||||||
|
deepgram_model TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'active' CHECK (status IN ('active', 'completed', 'terminated'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_sessions_user_id ON transcription_sessions(user_id);
|
||||||
|
CREATE INDEX idx_sessions_started_at ON transcription_sessions(started_at);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 004_usage_ledger.sql
|
||||||
|
```sql
|
||||||
|
CREATE TABLE usage_ledger (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
user_id UUID NOT NULL REFERENCES users(id),
|
||||||
|
session_id UUID REFERENCES transcription_sessions(id),
|
||||||
|
recorded_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
seconds INTEGER NOT NULL,
|
||||||
|
description TEXT -- e.g. 'session_usage', 'credit_purchase', 'manual_adjustment'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_ledger_user_id ON usage_ledger(user_id);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Environment Variables (.env.example)
|
||||||
|
|
||||||
|
```env
|
||||||
|
# Server
|
||||||
|
PORT=3000
|
||||||
|
NODE_ENV=development
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:5432/transcription_proxy
|
||||||
|
|
||||||
|
# Auth
|
||||||
|
JWT_SECRET=changeme_use_long_random_string
|
||||||
|
JWT_EXPIRY=7d
|
||||||
|
|
||||||
|
# Stripe
|
||||||
|
STRIPE_SECRET_KEY=sk_test_...
|
||||||
|
STRIPE_WEBHOOK_SECRET=whsec_...
|
||||||
|
|
||||||
|
# Deepgram
|
||||||
|
DEEPGRAM_API_KEY=your_deepgram_key_here
|
||||||
|
|
||||||
|
# Pricing (seconds per dollar — adjust for your margin)
|
||||||
|
# Default: 1000 seconds per $1 = $0.006/min managed cost covered + margin
|
||||||
|
CREDITS_PER_DOLLAR=1000
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1 — Core Server & Auth
|
||||||
|
|
||||||
|
### Goals
|
||||||
|
- Working Express app with Postgres connection
|
||||||
|
- Migration runner
|
||||||
|
- User registration and login
|
||||||
|
- JWT middleware
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Scaffold project**
|
||||||
|
- `npm init`, install dependencies: `express ws pg jsonwebtoken bcrypt stripe dotenv`
|
||||||
|
- Dev dependencies: `nodemon`
|
||||||
|
- Add `start` and `dev` scripts to package.json
|
||||||
|
|
||||||
|
2. **Database connection** (`src/db/index.js`)
|
||||||
|
- Export a `pg.Pool` instance using `DATABASE_URL`
|
||||||
|
- Export a `migrate()` function that reads `src/db/migrations/*.sql` in order,
|
||||||
|
checks `schema_migrations` table, and applies unapplied ones
|
||||||
|
- Call `migrate()` on server startup before listening
|
||||||
|
|
||||||
|
3. **Auth routes** (`src/routes/auth.js`)
|
||||||
|
- `POST /auth/register` — validate email/password, hash password with bcrypt (cost 12),
|
||||||
|
insert user, insert empty credit_balance row, return JWT
|
||||||
|
- `POST /auth/login` — verify credentials, return JWT + refresh token
|
||||||
|
- `POST /auth/refresh` — validate refresh token, return new JWT
|
||||||
|
- Passwords: minimum 8 characters, validate email format
|
||||||
|
|
||||||
|
4. **JWT middleware** (`src/middleware/auth.js`)
|
||||||
|
- Verify `Authorization: Bearer <token>` header
|
||||||
|
- Attach `req.user = { id, email }` on success
|
||||||
|
- Return 401 on failure
|
||||||
|
- Export as `requireAuth` middleware
|
||||||
|
|
||||||
|
5. **Basic health check**
|
||||||
|
- `GET /health` returns `{ status: 'ok', db: 'connected' }`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2 — Billing & Credits
|
||||||
|
|
||||||
|
### Goals
|
||||||
|
- Stripe Checkout session creation for credit purchases
|
||||||
|
- Webhook handler to fulfill purchases
|
||||||
|
- Balance endpoint
|
||||||
|
|
||||||
|
### Payment Methods
|
||||||
|
|
||||||
|
Use **Stripe Dynamic Payment Methods** — do NOT hardcode `payment_method_types` in the
|
||||||
|
Checkout Session. Instead, leave it unset and manage everything from the Stripe Dashboard.
|
||||||
|
|
||||||
|
Enable the following in the Stripe Dashboard under Settings → Payment Methods:
|
||||||
|
- **Cards** (Visa, Mastercard, Amex, Discover) — on by default
|
||||||
|
- **PayPal** — enable manually
|
||||||
|
- **Apple Pay** — on by default, shows automatically on Safari/iOS
|
||||||
|
- **Google Pay** — enable manually (one toggle)
|
||||||
|
- **Cash App Pay** — enable manually (popular with streaming audiences)
|
||||||
|
- **Link** — Stripe's saved payment network, on by default
|
||||||
|
|
||||||
|
Stripe will automatically show the most relevant methods to each user based on their
|
||||||
|
location and device. No code changes are needed to add or remove methods in future —
|
||||||
|
it's all dashboard config.
|
||||||
|
|
||||||
|
### Credit Packages
|
||||||
|
|
||||||
|
Define these as constants in `src/config.js`:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
CREDIT_PACKAGES: [
|
||||||
|
{ id: 'pack_500', label: '500 minutes', seconds: 30000, price_cents: 300 },
|
||||||
|
{ id: 'pack_1200', label: '1200 minutes', seconds: 72000, price_cents: 600 },
|
||||||
|
{ id: 'pack_3000', label: '3000 minutes', seconds: 180000, price_cents: 1200 },
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Adjust pricing to cover Deepgram costs ($0.006/min = $0.0001/sec) plus margin and
|
||||||
|
Stripe fees (~2.9% + $0.30).
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Stripe customer creation**
|
||||||
|
- On user registration, create a Stripe customer and store `stripe_customer_id`
|
||||||
|
- Do this asynchronously (don't block registration response)
|
||||||
|
|
||||||
|
2. **Billing routes** (`src/routes/billing.js`)
|
||||||
|
- `GET /billing/packages` — return credit package list (no auth required)
|
||||||
|
- `POST /billing/checkout` — requires auth, accepts `{ package_id }`,
|
||||||
|
creates Stripe Checkout Session using dynamic payment methods (do NOT pass
|
||||||
|
`payment_method_types` — omitting it enables dynamic methods automatically),
|
||||||
|
include `payment_intent_data.metadata` containing `user_id` and `package_id`,
|
||||||
|
returns `{ checkout_url }`
|
||||||
|
- `GET /billing/balance` — requires auth, returns `{ seconds_remaining, minutes_remaining }`
|
||||||
|
|
||||||
|
3. **Stripe webhook** (`src/webhooks/stripe.js`)
|
||||||
|
- Mount at `POST /webhooks/stripe` with raw body (use `express.raw()` for this route only)
|
||||||
|
- Verify signature with `stripe.webhooks.constructEvent()`
|
||||||
|
- Handle `checkout.session.completed`:
|
||||||
|
- Extract `user_id` and `package_id` from metadata
|
||||||
|
- Add seconds to `credit_balance`
|
||||||
|
- Insert row into `usage_ledger` with description `'credit_purchase'`
|
||||||
|
- Handle `payment_intent.payment_failed`: log it (no action needed for prepaid)
|
||||||
|
|
||||||
|
4. **Success/cancel pages**
|
||||||
|
- Stripe Checkout redirects to `GET /billing/success?session_id=...` and `/billing/cancel`
|
||||||
|
- These can be simple HTML responses or redirects to the web dashboard
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3 — WebSocket Proxy (Core Feature)
|
||||||
|
|
||||||
|
This is the most critical component. The proxy sits between the desktop client and Deepgram,
|
||||||
|
forwarding audio while tracking usage in real time.
|
||||||
|
|
||||||
|
### Connection Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Client connects → validate JWT → check credit balance → open Deepgram upstream
|
||||||
|
↓
|
||||||
|
Audio chunks arrive → forward to Deepgram → record usage every 5 seconds
|
||||||
|
↓
|
||||||
|
Transcription arrives from Deepgram → forward to client
|
||||||
|
↓
|
||||||
|
Client disconnects (or credits exhausted) → close upstream → finalize session
|
||||||
|
```
|
||||||
|
|
||||||
|
### WebSocket Protocol
|
||||||
|
|
||||||
|
**Client connects to**: `wss://your-domain/ws/transcribe`
|
||||||
|
|
||||||
|
**Client sends as first message** (JSON):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "auth",
|
||||||
|
"token": "<JWT>",
|
||||||
|
"config": {
|
||||||
|
"model": "nova-2",
|
||||||
|
"language": "en-US",
|
||||||
|
"interim_results": true,
|
||||||
|
"endpointing": 300
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**After auth success, client sends**: raw audio binary frames (PCM 16kHz mono)
|
||||||
|
|
||||||
|
**Server sends to client**:
|
||||||
|
```json
|
||||||
|
{ "type": "ready" }
|
||||||
|
{ "type": "transcript", "text": "...", "is_final": true, "confidence": 0.98 }
|
||||||
|
{ "type": "error", "code": "insufficient_credits", "message": "..." }
|
||||||
|
{ "type": "credits_low", "seconds_remaining": 300 }
|
||||||
|
{ "type": "session_end", "seconds_used": 120 }
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tasks (`src/websocket/proxy.js`)
|
||||||
|
|
||||||
|
1. **Upgrade handler**
|
||||||
|
- Attach to the HTTP server using `ws.Server({ noServer: true })`
|
||||||
|
- In `server.on('upgrade', ...)`, route `/ws/transcribe` to this handler
|
||||||
|
|
||||||
|
2. **Auth handshake**
|
||||||
|
- First message must be `{ type: 'auth', token: '...' }` — received within 5 seconds
|
||||||
|
or connection is terminated
|
||||||
|
- Verify JWT, load user's credit balance from DB
|
||||||
|
- If balance is 0 or negative, send `insufficient_credits` error and close
|
||||||
|
|
||||||
|
3. **Deepgram upstream connection**
|
||||||
|
- Open a WebSocket to Deepgram's streaming API:
|
||||||
|
`wss://api.deepgram.com/v1/listen?model=nova-2&language=en-US&interim_results=true`
|
||||||
|
- Auth header: `Authorization: Token <DEEPGRAM_API_KEY>`
|
||||||
|
- Use query params from client's `config` object (whitelist allowed params)
|
||||||
|
|
||||||
|
4. **Audio forwarding**
|
||||||
|
- All binary messages from client → forward directly to Deepgram upstream
|
||||||
|
- All messages from Deepgram → parse JSON, reformat, forward to client
|
||||||
|
|
||||||
|
5. **Usage tracking**
|
||||||
|
- Create a `transcription_sessions` row on connection
|
||||||
|
- Maintain an in-memory `secondsUsed` counter per connection
|
||||||
|
- Deepgram sends `{ type: 'Results', duration: X }` in responses — use this for
|
||||||
|
accurate second counting
|
||||||
|
- Every 10 seconds (or on disconnect), write current `secondsUsed` to DB:
|
||||||
|
- Update `transcription_sessions.seconds_used`
|
||||||
|
- Decrement `credit_balance.seconds_remaining`
|
||||||
|
- Insert into `usage_ledger`
|
||||||
|
- If `seconds_remaining` hits 0: send `insufficient_credits`, close connection
|
||||||
|
|
||||||
|
6. **Cleanup on disconnect**
|
||||||
|
- Mark session as `completed`, set `ended_at`
|
||||||
|
- Do final usage flush to DB
|
||||||
|
- Close Deepgram upstream if still open
|
||||||
|
|
||||||
|
7. **Error handling**
|
||||||
|
- If Deepgram upstream closes unexpectedly, notify client and close
|
||||||
|
- If client sends malformed data, log and continue (don't crash)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4 — Account Routes & Rate Limiting
|
||||||
|
|
||||||
|
### Tasks
|
||||||
|
|
||||||
|
1. **Account routes** (`src/routes/account.js`)
|
||||||
|
- `GET /account/me` — returns `{ email, credits: { seconds_remaining, minutes_remaining }, created_at }`
|
||||||
|
- `GET /account/usage` — returns last 30 days of `usage_ledger` entries grouped by day,
|
||||||
|
plus list of last 10 sessions with duration
|
||||||
|
|
||||||
|
2. **Rate limiting** (`src/middleware/rateLimit.js`)
|
||||||
|
- Use in-memory rate limiting (no Redis needed at this scale)
|
||||||
|
- Auth endpoints: max 10 requests per minute per IP
|
||||||
|
- WebSocket connections: max 2 concurrent connections per user
|
||||||
|
(store active connections in a `Map<userId, Set<ws>>`)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5 — Web Dashboard
|
||||||
|
|
||||||
|
A simple, functional HTML/CSS/JS dashboard. No framework — vanilla JS is fine.
|
||||||
|
This is a developer-friendly streamer tool, not a consumer SaaS, so clean and
|
||||||
|
functional beats flashy.
|
||||||
|
|
||||||
|
### Pages
|
||||||
|
|
||||||
|
**`/` (Landing / Login)**
|
||||||
|
- Brief product description (what this is, why it exists)
|
||||||
|
- Login form and link to register
|
||||||
|
- Link to GitHub/Gitea repo
|
||||||
|
|
||||||
|
**`/dashboard` (Post-login)**
|
||||||
|
- Current credit balance (minutes remaining, prominently displayed)
|
||||||
|
- "Buy Credits" section showing the three packages with Stripe Checkout buttons
|
||||||
|
- Usage chart: last 30 days bar chart (vanilla canvas or a small CDN chart lib)
|
||||||
|
- Recent sessions table: date, duration, status
|
||||||
|
|
||||||
|
**`/register`**
|
||||||
|
- Registration form
|
||||||
|
|
||||||
|
### Implementation Notes
|
||||||
|
- Store JWT in `localStorage`, attach as `Authorization` header on API calls
|
||||||
|
- Redirect to `/` if JWT missing or expired
|
||||||
|
- Keep CSS minimal but readable — this is a utility dashboard
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 6 — Desktop App Integration
|
||||||
|
|
||||||
|
Changes needed in the `local-transcription` Python repo.
|
||||||
|
|
||||||
|
### New file: `client/remote_transcription.py`
|
||||||
|
|
||||||
|
This module replaces `transcription_engine_realtime.py` when remote mode is active.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Pseudocode / spec for Claude Code to implement
|
||||||
|
|
||||||
|
class RemoteTranscriptionEngine:
|
||||||
|
"""
|
||||||
|
Connects to the transcription proxy WebSocket and streams audio.
|
||||||
|
Provides the same callback interface as the local engine so the
|
||||||
|
rest of the app doesn't need to change.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config, on_transcript_callback):
|
||||||
|
# config contains: server_url, auth_token (or byok_api_key), model
|
||||||
|
...
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
# Open WebSocket connection
|
||||||
|
# Send auth message
|
||||||
|
# Start audio capture thread (reuse existing audio_capture.py)
|
||||||
|
...
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
# Close WebSocket gracefully
|
||||||
|
...
|
||||||
|
|
||||||
|
def _on_audio_chunk(self, audio_data):
|
||||||
|
# Called by audio_capture.py with raw PCM data
|
||||||
|
# Send as binary WebSocket frame
|
||||||
|
...
|
||||||
|
|
||||||
|
def _on_server_message(self, message):
|
||||||
|
# Parse JSON from server
|
||||||
|
# On type='transcript': call on_transcript_callback
|
||||||
|
# On type='credits_low': trigger UI warning
|
||||||
|
# On type='error': surface to user
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
### BYOK Mode
|
||||||
|
|
||||||
|
When user provides their own Deepgram key, connect directly to Deepgram instead of the proxy:
|
||||||
|
- Endpoint: `wss://api.deepgram.com/v1/listen?...`
|
||||||
|
- Auth: `Authorization: Token <user_key>`
|
||||||
|
- No session tracking (Deepgram handles billing directly to the user)
|
||||||
|
- Same `RemoteTranscriptionEngine` class, just different URL and auth header
|
||||||
|
|
||||||
|
### Settings Changes (`gui/settings_dialog_qt.py`)
|
||||||
|
|
||||||
|
Add a new "Transcription Mode" section:
|
||||||
|
|
||||||
|
```
|
||||||
|
Transcription Mode:
|
||||||
|
○ Local (Whisper) [existing behavior]
|
||||||
|
○ Remote - Managed [requires login]
|
||||||
|
○ Remote - BYOK [requires Deepgram API key]
|
||||||
|
|
||||||
|
[If Managed selected]:
|
||||||
|
Server URL: [____________]
|
||||||
|
[Login / Register] [View Balance: 420 min remaining]
|
||||||
|
|
||||||
|
[If BYOK selected]:
|
||||||
|
Deepgram API Key: [____________]
|
||||||
|
Model: [nova-2 ▼]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Config additions (`config/default_config.yaml`)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
remote:
|
||||||
|
mode: local # local | managed | byok
|
||||||
|
server_url: "" # proxy server URL for managed mode
|
||||||
|
auth_token: "" # JWT stored after login
|
||||||
|
byok_api_key: "" # Deepgram key for BYOK mode
|
||||||
|
deepgram_model: nova-2
|
||||||
|
language: en-US
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build & Deployment Notes
|
||||||
|
|
||||||
|
### Docker Compose (local dev)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
db:
|
||||||
|
image: postgres:15
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: transcription_proxy
|
||||||
|
POSTGRES_USER: user
|
||||||
|
POSTGRES_PASSWORD: password
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- pgdata:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
app:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
DATABASE_URL: postgresql://user:password@db:5432/transcription_proxy
|
||||||
|
depends_on:
|
||||||
|
- db
|
||||||
|
volumes:
|
||||||
|
- .:/app
|
||||||
|
- /app/node_modules
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pgdata:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Deployment
|
||||||
|
|
||||||
|
This service is a good fit for deployment on AnHonestHost WHP as a containerized app,
|
||||||
|
or on a small DigitalOcean/Linode VPS. Requirements are light:
|
||||||
|
- 512MB RAM is sufficient
|
||||||
|
- Postgres can be the same instance as other services or managed (e.g., Supabase free tier)
|
||||||
|
- Needs a public domain with SSL for WebSocket (`wss://`) to work from desktop clients
|
||||||
|
|
||||||
|
Reverse proxy config (Nginx or HAProxy) should:
|
||||||
|
- Proxy HTTP → `localhost:3000`
|
||||||
|
- Pass `Upgrade` and `Connection` headers for WebSocket support
|
||||||
|
- Set `proxy_read_timeout 3600` (sessions can be long)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Order
|
||||||
|
|
||||||
|
Build and test in this sequence:
|
||||||
|
|
||||||
|
1. Project scaffold + DB connection + migrations
|
||||||
|
2. Auth (register/login/JWT) — test with curl
|
||||||
|
3. Stripe billing + webhook — test with Stripe CLI (`stripe listen`)
|
||||||
|
4. WebSocket proxy — test with a simple browser WebSocket client first
|
||||||
|
5. Usage tracking and credit decrement
|
||||||
|
6. Account/usage routes
|
||||||
|
7. Web dashboard
|
||||||
|
8. Desktop app integration (separate PR in local-transcription repo)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Decisions & Rationale
|
||||||
|
|
||||||
|
| Decision | Choice | Reason |
|
||||||
|
|---|---|---|
|
||||||
|
| Credits model | Prepaid | No surprise charges, simpler billing, better for irregular streamer usage |
|
||||||
|
| WebSocket library | `ws` | Lightweight, no abstraction overhead, plays well with raw binary audio |
|
||||||
|
| Auth | JWT (stateless) | Desktop app holds token locally; no session store needed |
|
||||||
|
| DB driver | `node-postgres` (pg) | No ORM overhead; schema is simple enough for raw SQL |
|
||||||
|
| Migrations | Raw SQL files | No dependency on Knex/Prisma; easy to inspect and reason about |
|
||||||
|
| Rate limiting | In-memory | Redis is overkill for this scale; single-process Node is fine initially |
|
||||||
|
| Frontend | Vanilla JS | Dashboard is simple utility UI; no framework justified |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What This Plan Does NOT Cover (Future Work)
|
||||||
|
|
||||||
|
- OAuth / social login
|
||||||
|
- Admin panel for managing users
|
||||||
|
- Refund / credit adjustment tooling
|
||||||
|
- Email verification
|
||||||
|
- Password reset flow
|
||||||
|
- Multi-language support beyond Deepgram's defaults
|
||||||
|
- Analytics / aggregated usage reporting
|
||||||
|
- Self-hosted Whisper inference as a third backend option
|
||||||
652
README.md
652
README.md
@@ -1,494 +1,318 @@
|
|||||||
# Local Transcription for Streamers
|
# Local Transcription
|
||||||
|
|
||||||
A local speech-to-text application designed for streamers that provides real-time transcription using Whisper or similar models. Multiple users can run the application locally and sync their transcriptions to a centralized web stream that can be easily captured in OBS or other streaming software.
|
A real-time speech-to-text desktop application for streamers. Runs locally on your machine with GPU or CPU, displays transcriptions via OBS browser source, and optionally syncs with other users through a multi-user server.
|
||||||
|
|
||||||
|
**Version 1.4.0**
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Standalone Desktop Application**: Use locally with built-in GUI display - no server required
|
- **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
|
||||||
- **Local Transcription**: Run Whisper (or compatible models) locally on your machine
|
- **Cross-Platform**: Native desktop app for Windows, macOS, and Linux via [Tauri](https://tauri.app/)
|
||||||
- **CPU/GPU Support**: Choose between CPU or GPU processing based on your hardware
|
- **Dual Transcription Modes**: Local (Whisper) or cloud (Deepgram) with managed billing or BYOK
|
||||||
- **Real-time Processing**: Live audio transcription with minimal latency
|
- **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
|
||||||
|
- **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
|
||||||
|
- **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
|
||||||
|
- **Multi-User Sync**: Optional Node.js server to sync transcriptions across multiple users
|
||||||
|
- **Custom Fonts**: Support for system fonts, web-safe fonts, Google Fonts, and custom font files
|
||||||
|
- **Customizable Colors**: User-configurable colors for name, text, and background
|
||||||
- **Noise Suppression**: Built-in audio preprocessing to reduce background noise
|
- **Noise Suppression**: Built-in audio preprocessing to reduce background noise
|
||||||
- **User Configuration**: Set your display name and preferences through the GUI
|
- **Auto-Updates**: Automatic update checking with release notes display
|
||||||
- **Optional Multi-user Sync**: Connect to a server to sync transcriptions with other users
|
|
||||||
- **OBS Integration**: Web-based output designed for easy browser source capture
|
## Architecture
|
||||||
- **Privacy-First**: All processing happens locally; only transcription text is shared
|
|
||||||
- **Customizable**: Configure model size, language, and streaming settings
|
The application uses a two-process architecture:
|
||||||
|
|
||||||
|
1. **Tauri Shell** (Svelte 5 frontend) — lightweight native window (~50MB) rendering the UI
|
||||||
|
2. **Python Backend** (sidecar) — headless process running transcription, audio capture, and the OBS web server
|
||||||
|
|
||||||
|
The Tauri frontend communicates with the Python backend via REST API and WebSocket, following the same pattern as [voice-to-notes](https://repo.anhonesthost.net/MacroPad/voice-to-notes).
|
||||||
|
|
||||||
|
```
|
||||||
|
Tauri App (user launches this)
|
||||||
|
└─ Spawns Python backend as sidecar
|
||||||
|
├─ FastAPI REST API (control endpoints)
|
||||||
|
├─ WebSocket /ws/control (real-time state + transcriptions)
|
||||||
|
├─ OBS web display at http://localhost:8080
|
||||||
|
└─ Transcription engine (Whisper or Deepgram)
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Legacy GUI**: The original PySide6/Qt desktop GUI (`main.py`) still works alongside the new Tauri frontend during the transition period.
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
### Running from Source
|
### Running from Source
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Install dependencies
|
# Install Python dependencies
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
# Run the application
|
# Run the Tauri app (frontend + backend)
|
||||||
|
npm install
|
||||||
|
npm run tauri dev
|
||||||
|
|
||||||
|
# Or run just the headless backend (for development)
|
||||||
|
uv run python -m backend.main_headless
|
||||||
|
|
||||||
|
# Or run the legacy PySide6 GUI
|
||||||
uv run python main.py
|
uv run python main.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building Standalone Executables
|
### Using Pre-Built Executables
|
||||||
|
|
||||||
To create standalone executables for distribution:
|
Download the latest release from the [releases page](https://repo.anhonesthost.net/streamer-tools/local-transcription/releases):
|
||||||
|
|
||||||
|
- **App installer** (Tauri shell): `.msi` (Windows), `.dmg` (macOS), `.deb`/`.rpm`/`.AppImage` (Linux)
|
||||||
|
- **Sidecar** (Python backend): Download the matching `sidecar-*` zip for your platform (CUDA or CPU)
|
||||||
|
|
||||||
|
### Building from Source
|
||||||
|
|
||||||
**Linux:**
|
|
||||||
```bash
|
```bash
|
||||||
./build.sh
|
# Build the Tauri app
|
||||||
```
|
npm install
|
||||||
|
npm run tauri build
|
||||||
|
# Output: src-tauri/target/release/bundle/
|
||||||
|
|
||||||
**Windows:**
|
# Build the Python sidecar (headless, no Qt)
|
||||||
```cmd
|
uv sync
|
||||||
|
uv run pyinstaller local-transcription-headless.spec
|
||||||
|
# Output: dist/local-transcription-backend/
|
||||||
|
|
||||||
|
# Build the legacy PySide6 app (Linux)
|
||||||
|
./build.sh
|
||||||
|
# Build the legacy PySide6 app (Windows)
|
||||||
build.bat
|
build.bat
|
||||||
```
|
```
|
||||||
|
|
||||||
For detailed build instructions, see [BUILD.md](BUILD.md).
|
For detailed build instructions, see [BUILD.md](BUILD.md).
|
||||||
|
|
||||||
## Architecture Overview
|
## Usage
|
||||||
|
|
||||||
The application can run in two modes:
|
### Standalone Mode
|
||||||
|
|
||||||
### Standalone Mode (No Server Required):
|
1. Launch the application
|
||||||
1. **Desktop Application**: Captures audio, performs speech-to-text, and displays transcriptions locally in a GUI window
|
2. Select your microphone from the audio device dropdown
|
||||||
|
3. Choose a Whisper model (smaller = faster, larger = more accurate):
|
||||||
|
- `tiny.en` / `tiny` — Fastest, good for quick captions
|
||||||
|
- `base.en` / `base` — Balanced speed and accuracy
|
||||||
|
- `small.en` / `small` — Better accuracy
|
||||||
|
- `medium.en` / `medium` — High accuracy
|
||||||
|
- `large-v3` — Best accuracy (requires more resources)
|
||||||
|
4. Click **Start** to begin transcription
|
||||||
|
5. Transcriptions appear in the main window and at `http://localhost:8080`
|
||||||
|
|
||||||
### Multi-user Sync Mode (Optional):
|
### Remote Transcription (Deepgram)
|
||||||
1. **Local Transcription Client**: Captures audio, performs speech-to-text, and sends results to the web server
|
|
||||||
2. **Centralized Web Server**: Aggregates transcriptions from multiple clients and serves a web stream
|
|
||||||
3. **Web Stream Interface**: Browser-accessible page displaying synchronized transcriptions (for OBS capture)
|
|
||||||
|
|
||||||
## Use Cases
|
Instead of local Whisper models, you can use cloud-based transcription:
|
||||||
|
|
||||||
- **Multi-language Streams**: Multiple translators transcribing in different languages
|
- **Managed mode**: Sign up via the transcription proxy for metered billing
|
||||||
- **Accessibility**: Provide real-time captions for viewers
|
- **BYOK mode**: Bring your own Deepgram API key for direct access
|
||||||
- **Collaborative Podcasts**: Multiple hosts with separate transcriptions
|
|
||||||
- **Gaming Commentary**: Track who said what in multiplayer sessions
|
|
||||||
|
|
||||||
---
|
Configure in Settings > Remote Transcription.
|
||||||
|
|
||||||
## Implementation Plan
|
### OBS Browser Source Setup
|
||||||
|
|
||||||
### Phase 1: Standalone Desktop Application
|
1. Start the Local Transcription app
|
||||||
|
2. In OBS, add a **Browser** source
|
||||||
|
3. Set URL to `http://localhost:8080`
|
||||||
|
4. Set dimensions (e.g., 1920x300)
|
||||||
|
5. Check "Shutdown source when not visible" for performance
|
||||||
|
|
||||||
**Objective**: Build a fully functional standalone transcription app with GUI that works without any server
|
### Multi-User Mode (Optional)
|
||||||
|
|
||||||
#### Components:
|
For syncing transcriptions across multiple users (e.g., multi-host streams or translation teams):
|
||||||
1. **Audio Capture Module**
|
|
||||||
- Capture system audio or microphone input
|
|
||||||
- Support multiple audio sources (virtual audio cables, physical devices)
|
|
||||||
- Real-time audio buffering with configurable chunk sizes
|
|
||||||
- **Noise Suppression**: Preprocess audio to reduce background noise
|
|
||||||
- Libraries: `pyaudio`, `sounddevice`, `noisereduce`, `webrtcvad`
|
|
||||||
|
|
||||||
2. **Noise Suppression Engine**
|
1. Deploy the Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
|
||||||
- Real-time noise reduction using RNNoise or noisereduce
|
2. In the app settings, enable **Server Sync**
|
||||||
- Adjustable noise reduction strength
|
3. Enter the server URL (e.g., `http://your-server:3000/api/send`)
|
||||||
- Optional VAD (Voice Activity Detection) to skip silent segments
|
4. Set a room name and passphrase (shared with other users)
|
||||||
- Libraries: `noisereduce`, `rnnoise-python`, `webrtcvad`
|
5. In OBS, use the server's display URL with your room name:
|
||||||
|
```
|
||||||
|
http://your-server:3000/display?room=YOURROOM×tamps=true&maxlines=50
|
||||||
|
```
|
||||||
|
|
||||||
3. **Transcription Engine**
|
## Configuration
|
||||||
- Integrate OpenAI Whisper (or alternatives: faster-whisper, whisper.cpp)
|
|
||||||
- Support multiple model sizes (tiny, base, small, medium, large)
|
|
||||||
- CPU and GPU inference options
|
|
||||||
- Model management and automatic downloading
|
|
||||||
- Libraries: `openai-whisper`, `faster-whisper`, `torch`
|
|
||||||
|
|
||||||
4. **Device Selection**
|
Settings are stored at `~/.local-transcription/config.yaml` and can be modified through the GUI settings panel or the REST API.
|
||||||
- Auto-detect available compute devices (CPU, CUDA, MPS for Mac)
|
|
||||||
- Allow user to specify preferred device via GUI
|
|
||||||
- Graceful fallback if GPU unavailable
|
|
||||||
- Display device status and performance metrics
|
|
||||||
|
|
||||||
5. **Desktop GUI Application**
|
### Key Settings
|
||||||
- Cross-platform GUI using PyQt6, Tkinter, or CustomTkinter
|
|
||||||
- Main transcription display window (scrolling text area)
|
|
||||||
- Settings panel for configuration
|
|
||||||
- User name input field
|
|
||||||
- Audio input device selector
|
|
||||||
- Model size selector
|
|
||||||
- CPU/GPU toggle
|
|
||||||
- Start/Stop transcription button
|
|
||||||
- Optional: System tray integration
|
|
||||||
- Libraries: `PyQt6`, `customtkinter`, or `tkinter`
|
|
||||||
|
|
||||||
6. **Local Display**
|
| Setting | Description | Default |
|
||||||
- Real-time transcription display in GUI window
|
|---------|-------------|---------|
|
||||||
- Scrolling text with timestamps
|
| `transcription.model` | Whisper model to use | `base.en` |
|
||||||
- User name/label shown with transcriptions
|
| `transcription.device` | Processing device (auto/cuda/cpu) | `auto` |
|
||||||
- Copy transcription to clipboard
|
| `transcription.enable_realtime_transcription` | Show preview while speaking | `false` |
|
||||||
- Optional: Save transcription to file (TXT, SRT, VTT)
|
| `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
|
||||||
|
| `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
|
||||||
|
| `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
|
||||||
|
| `remote.mode` | Transcription mode (local/managed/byok) | `local` |
|
||||||
|
| `display.show_timestamps` | Show timestamps with transcriptions | `true` |
|
||||||
|
| `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
|
||||||
|
| `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
|
||||||
|
| `web_server.port` | Local web server port | `8080` |
|
||||||
|
|
||||||
#### Tasks:
|
See [config/default_config.yaml](config/default_config.yaml) for all available options.
|
||||||
- [ ] Set up project structure and dependencies
|
|
||||||
- [ ] Implement audio capture with device selection
|
|
||||||
- [ ] Add noise suppression and VAD preprocessing
|
|
||||||
- [ ] Integrate Whisper model loading and inference
|
|
||||||
- [ ] Add CPU/GPU device detection and selection logic
|
|
||||||
- [ ] Create real-time audio buffer processing pipeline
|
|
||||||
- [ ] Design and implement GUI layout (main window)
|
|
||||||
- [ ] Add settings panel with user name configuration
|
|
||||||
- [ ] Implement local transcription display area
|
|
||||||
- [ ] Add start/stop controls and status indicators
|
|
||||||
- [ ] Test transcription accuracy and latency
|
|
||||||
- [ ] Test noise suppression effectiveness
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Phase 2: Web Server and Sync System
|
|
||||||
|
|
||||||
**Objective**: Create a centralized server to aggregate and serve transcriptions
|
|
||||||
|
|
||||||
#### Components:
|
|
||||||
1. **Web Server**
|
|
||||||
- FastAPI or Flask-based REST API
|
|
||||||
- WebSocket support for real-time updates
|
|
||||||
- User/client registration and management
|
|
||||||
- Libraries: `fastapi`, `uvicorn`, `websockets`
|
|
||||||
|
|
||||||
2. **Transcription Aggregator**
|
|
||||||
- Receive transcription chunks from multiple clients
|
|
||||||
- Associate transcriptions with user IDs/names
|
|
||||||
- Timestamp management and synchronization
|
|
||||||
- Buffer management for smooth streaming
|
|
||||||
|
|
||||||
3. **Database/Storage** (Optional)
|
|
||||||
- Store transcription history (SQLite for simplicity)
|
|
||||||
- Session management
|
|
||||||
- Export functionality (SRT, VTT, TXT formats)
|
|
||||||
|
|
||||||
#### API Endpoints:
|
|
||||||
- `POST /api/register` - Register a new client
|
|
||||||
- `POST /api/transcription` - Submit transcription chunk
|
|
||||||
- `WS /api/stream` - WebSocket for real-time transcription stream
|
|
||||||
- `GET /stream` - Web page for OBS browser source
|
|
||||||
|
|
||||||
#### Tasks:
|
|
||||||
- [ ] Set up FastAPI server with CORS support
|
|
||||||
- [ ] Implement WebSocket handler for real-time streaming
|
|
||||||
- [ ] Create client registration system
|
|
||||||
- [ ] Build transcription aggregation logic
|
|
||||||
- [ ] Add timestamp synchronization
|
|
||||||
- [ ] Create data models for clients and transcriptions
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Phase 3: Client-Server Communication (Optional Multi-user Mode)
|
|
||||||
|
|
||||||
**Objective**: Add optional server connectivity to enable multi-user transcription sync
|
|
||||||
|
|
||||||
#### Components:
|
|
||||||
1. **HTTP/WebSocket Client**
|
|
||||||
- Register client with server on startup
|
|
||||||
- Send transcription chunks as they're generated
|
|
||||||
- Handle connection drops and reconnection
|
|
||||||
- Libraries: `requests`, `websockets`
|
|
||||||
|
|
||||||
2. **Configuration System**
|
|
||||||
- Config file for server URL, API keys, user settings
|
|
||||||
- Model preferences (size, language)
|
|
||||||
- Audio input settings
|
|
||||||
- Format: YAML or JSON
|
|
||||||
|
|
||||||
3. **Status Monitoring**
|
|
||||||
- Connection status indicator
|
|
||||||
- Transcription queue health
|
|
||||||
- Error handling and logging
|
|
||||||
|
|
||||||
#### Tasks:
|
|
||||||
- [ ] Add "Enable Server Sync" toggle to GUI
|
|
||||||
- [ ] Add server URL configuration field in settings
|
|
||||||
- [ ] Implement WebSocket client for sending transcriptions
|
|
||||||
- [ ] Add configuration file support (YAML/JSON)
|
|
||||||
- [ ] Create connection management with auto-reconnect
|
|
||||||
- [ ] Add local logging and error handling
|
|
||||||
- [ ] Add server connection status indicator to GUI
|
|
||||||
- [ ] Allow app to function normally if server is unavailable
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Phase 4: Web Stream Interface (OBS Integration)
|
|
||||||
|
|
||||||
**Objective**: Create a web page that displays synchronized transcriptions for OBS
|
|
||||||
|
|
||||||
#### Components:
|
|
||||||
1. **Web Frontend**
|
|
||||||
- HTML/CSS/JavaScript page for displaying transcriptions
|
|
||||||
- Responsive design with customizable styling
|
|
||||||
- Auto-scroll with configurable retention window
|
|
||||||
- Libraries: Vanilla JS or lightweight framework (Alpine.js, htmx)
|
|
||||||
|
|
||||||
2. **Styling Options**
|
|
||||||
- Customizable fonts, colors, sizes
|
|
||||||
- Background transparency for OBS chroma key
|
|
||||||
- User name/ID display options
|
|
||||||
- Timestamp display (optional)
|
|
||||||
|
|
||||||
3. **Display Modes**
|
|
||||||
- Scrolling captions (like live TV captions)
|
|
||||||
- Multi-user panel view (separate sections per user)
|
|
||||||
- Overlay mode (minimal UI for transparency)
|
|
||||||
|
|
||||||
#### Tasks:
|
|
||||||
- [ ] Create HTML template for transcription display
|
|
||||||
- [ ] Implement WebSocket client in JavaScript
|
|
||||||
- [ ] Add CSS styling with OBS-friendly transparency
|
|
||||||
- [ ] Create customization controls (URL parameters or UI)
|
|
||||||
- [ ] Test with OBS browser source
|
|
||||||
- [ ] Add configurable retention/scroll behavior
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Phase 5: Advanced Features
|
|
||||||
|
|
||||||
**Objective**: Enhance functionality and user experience
|
|
||||||
|
|
||||||
#### Features:
|
|
||||||
1. **Language Detection**
|
|
||||||
- Auto-detect spoken language
|
|
||||||
- Multi-language support in single stream
|
|
||||||
- Language selector in GUI
|
|
||||||
|
|
||||||
2. **Speaker Diarization** (Optional)
|
|
||||||
- Identify different speakers
|
|
||||||
- Label transcriptions by speaker
|
|
||||||
- Useful for multi-host streams
|
|
||||||
|
|
||||||
3. **Profanity Filtering**
|
|
||||||
- Optional word filtering/replacement
|
|
||||||
- Customizable filter lists
|
|
||||||
- Toggle in GUI settings
|
|
||||||
|
|
||||||
4. **Advanced Noise Profiles**
|
|
||||||
- Save and load custom noise profiles
|
|
||||||
- Adaptive noise suppression
|
|
||||||
- Different profiles for different environments
|
|
||||||
|
|
||||||
5. **Export Functionality**
|
|
||||||
- Save transcriptions in multiple formats (TXT, SRT, VTT, JSON)
|
|
||||||
- Export button in GUI
|
|
||||||
- Automatic session saving
|
|
||||||
|
|
||||||
6. **Hotkey Support**
|
|
||||||
- Global hotkeys to start/stop transcription
|
|
||||||
- Mute/unmute hotkey
|
|
||||||
- Quick save hotkey
|
|
||||||
|
|
||||||
7. **Docker Support**
|
|
||||||
- Containerized server deployment
|
|
||||||
- Docker Compose for easy multi-component setup
|
|
||||||
- Pre-built images for easy deployment
|
|
||||||
|
|
||||||
8. **Themes and Customization**
|
|
||||||
- Dark/light theme toggle
|
|
||||||
- Customizable font sizes and colors for display
|
|
||||||
- OBS-friendly transparent overlay mode
|
|
||||||
|
|
||||||
#### Tasks:
|
|
||||||
- [ ] Add language detection and multi-language support
|
|
||||||
- [ ] Implement speaker diarization
|
|
||||||
- [ ] Create optional profanity filter
|
|
||||||
- [ ] Add export functionality (SRT, VTT, plain text, JSON)
|
|
||||||
- [ ] Implement global hotkey support
|
|
||||||
- [ ] Create Docker containers for server component
|
|
||||||
- [ ] Add theme customization options
|
|
||||||
- [ ] Create advanced noise profile management
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Technology Stack
|
|
||||||
|
|
||||||
### Local Client:
|
|
||||||
- **Python 3.9+**
|
|
||||||
- **GUI**: PyQt6 / CustomTkinter / tkinter
|
|
||||||
- **Audio**: PyAudio / sounddevice
|
|
||||||
- **Noise Suppression**: noisereduce / rnnoise-python
|
|
||||||
- **VAD**: webrtcvad
|
|
||||||
- **ML Framework**: PyTorch (for Whisper)
|
|
||||||
- **Transcription**: openai-whisper / faster-whisper
|
|
||||||
- **Networking**: websockets, requests (optional for server sync)
|
|
||||||
- **Config**: PyYAML / json
|
|
||||||
|
|
||||||
### Server:
|
|
||||||
- **Backend**: FastAPI / Flask
|
|
||||||
- **WebSocket**: python-websockets / FastAPI WebSockets
|
|
||||||
- **Server**: Uvicorn / Gunicorn
|
|
||||||
- **Database** (optional): SQLite / PostgreSQL
|
|
||||||
- **CORS**: fastapi-cors
|
|
||||||
|
|
||||||
### Web Interface:
|
|
||||||
- **Frontend**: HTML5, CSS3, JavaScript (ES6+)
|
|
||||||
- **Real-time**: WebSocket API
|
|
||||||
- **Styling**: CSS Grid/Flexbox for layout
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
local-transcription/
|
local-transcription/
|
||||||
| |||||||