Compare commits
38 Commits
sidecar-v1
...
sidecar-v1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d263be2ac1 | ||
|
|
1c8c6ad7e8 | ||
|
|
023bc0218b | ||
|
|
634506f902 | ||
|
|
8c7f4e8008 | ||
|
|
b8d718caa6 | ||
|
|
d92005bf95 | ||
|
|
e90d154b83 | ||
|
|
fa749b571d | ||
|
|
ef188e1f67 | ||
|
|
f7b9695418 | ||
|
|
b4c0589b04 | ||
|
|
66c441b17f | ||
|
|
94bc704950 | ||
|
|
7900d2d9f2 | ||
|
|
e0396df7b0 | ||
|
|
ad89735822 | ||
|
|
f0b5890eba | ||
|
|
8df1ab9817 | ||
|
|
34a165fc05 | ||
|
|
8f4e5cc099 | ||
|
|
16f9ac2ab8 | ||
|
|
cd325102e2 | ||
|
|
d220158dd7 | ||
|
|
8670e19acc | ||
|
|
812cc4ac5e | ||
|
|
4aa19eee86 | ||
|
|
b8dfe0f1ba | ||
|
|
5837b97a20 | ||
|
|
ab09a3e9da | ||
|
|
5343a28a08 | ||
|
|
f0bf026133 | ||
|
|
37a029d1c6 | ||
|
|
5ec030387f | ||
|
|
4d9bdba903 | ||
|
|
a7a3bcd102 | ||
|
|
115d93482a | ||
|
|
fb672cbaef |
@@ -46,8 +46,45 @@ jobs:
|
||||
shell: powershell
|
||||
run: npm ci
|
||||
|
||||
- name: Setup Azure Artifact Signing
|
||||
shell: powershell
|
||||
env:
|
||||
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
|
||||
AZURE_SIGNING_ENDPOINT: ${{ secrets.AZURE_SIGNING_ENDPOINT }}
|
||||
AZURE_SIGNING_ACCOUNT: ${{ secrets.AZURE_SIGNING_ACCOUNT }}
|
||||
AZURE_CERT_PROFILE: ${{ secrets.AZURE_CERT_PROFILE }}
|
||||
run: |
|
||||
if (-not $env:AZURE_CLIENT_ID) {
|
||||
Write-Host "No Azure signing secrets configured, skipping code signing setup"
|
||||
return
|
||||
}
|
||||
|
||||
Write-Host "Setting up Azure Artifact Signing..."
|
||||
|
||||
# Install Artifact Signing client tools
|
||||
nuget install Microsoft.ArtifactSigning.Client -x -OutputDirectory .\signing-tools
|
||||
$dlibPath = (Resolve-Path ".\signing-tools\Microsoft.ArtifactSigning.Client*\bin\x64\Azure.CodeSigning.Dlib.dll").Path
|
||||
|
||||
# Write metadata.json
|
||||
@{
|
||||
Endpoint = $env:AZURE_SIGNING_ENDPOINT
|
||||
CodeSigningAccountName = $env:AZURE_SIGNING_ACCOUNT
|
||||
CertificateProfileName = $env:AZURE_CERT_PROFILE
|
||||
} | ConvertTo-Json | Out-File -Encoding UTF8 metadata.json
|
||||
$metadataPath = (Resolve-Path "metadata.json").Path
|
||||
|
||||
# Inject signCommand into tauri.conf.json for this build
|
||||
$conf = Get-Content src-tauri\tauri.conf.json -Raw | ConvertFrom-Json
|
||||
$signCmd = "signtool.exe sign /v /fd SHA256 /tr http://timestamp.acs.microsoft.com /td SHA256 /dlib `"$dlibPath`" /dmdf `"$metadataPath`" %1"
|
||||
$conf.bundle.windows | Add-Member -NotePropertyName "signCommand" -NotePropertyValue $signCmd -Force
|
||||
$conf | ConvertTo-Json -Depth 10 | Set-Content src-tauri\tauri.conf.json -Encoding UTF8
|
||||
|
||||
- name: Build Tauri app
|
||||
shell: powershell
|
||||
env:
|
||||
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
|
||||
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
|
||||
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
|
||||
run: npm run tauri build
|
||||
|
||||
- name: Upload to release
|
||||
|
||||
26
CLAUDE.md
26
CLAUDE.md
@@ -11,9 +11,11 @@ Local Transcription is a cross-platform desktop application for real-time speech
|
||||
**Key Features:**
|
||||
- Cross-platform desktop app (Windows, macOS, Linux) via Tauri v2 + Svelte 5
|
||||
- Headless Python backend with FastAPI control API
|
||||
- Dual transcription modes: local Whisper or cloud Deepgram (managed/BYOK)
|
||||
- Cloud-first: defaults to Deepgram (BYOK) transcription; local Whisper also supported
|
||||
- Settings UI hides local-only options (model, VAD, timing) when in cloud mode
|
||||
- Start button gated on API key / login — shows guidance if not configured
|
||||
- Shared Captions: create rooms, share via codes, join with one click (hosted at caption.shadowdao.com)
|
||||
- Built-in web server for OBS browser source at `http://localhost:8080`
|
||||
- Optional multi-user sync via Node.js server
|
||||
- CUDA, MPS (Apple Silicon), and CPU support
|
||||
- Auto-updates, custom fonts, configurable colors
|
||||
|
||||
@@ -273,9 +275,29 @@ All per-OS build workflows can be re-run independently via `workflow_dispatch` w
|
||||
- `Info.plist` must include `NSMicrophoneUsageDescription` for mic access
|
||||
- No CUDA builds — CPU/MPS only
|
||||
|
||||
## Code Signing
|
||||
|
||||
Code signing is configured for Windows and macOS to eliminate install warnings (SmartScreen / Gatekeeper). See [SIGNING.md](SIGNING.md) for full setup details.
|
||||
|
||||
**Status (as of 2026-04-10):** CI workflow changes are committed. Waiting on identity verification for both platforms before secrets can be configured.
|
||||
|
||||
**How it works:**
|
||||
- macOS: Tauri auto-signs when `APPLE_CERTIFICATE` and related env vars are set in CI. Notarization uses App Store Connect API key.
|
||||
- Windows: Azure Artifact Signing via `signtool.exe` + dlib. CI workflow injects `signCommand` into `tauri.conf.json` at build time when `AZURE_CLIENT_ID` is set.
|
||||
- Both are no-ops when secrets aren't configured — unsigned builds work as before.
|
||||
|
||||
**Key files:**
|
||||
- `src-tauri/Entitlements.plist` — macOS hardened runtime entitlements (mic, network)
|
||||
- `src-tauri/Info.plist` — macOS microphone usage description
|
||||
- `.gitea/workflows/build-app-macos.yml` — Apple signing + notarization
|
||||
- `.gitea/workflows/build-app-windows.yml` — Azure Artifact Signing
|
||||
|
||||
**Secrets required (12 total):** See [SIGNING.md](SIGNING.md) for the full list — 6 Apple secrets, 6 Azure secrets.
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [README.md](README.md) — User-facing documentation
|
||||
- [BUILD.md](BUILD.md) — Detailed build instructions
|
||||
- [INSTALL.md](INSTALL.md) — Installation guide
|
||||
- [SIGNING.md](SIGNING.md) — Code signing setup guide
|
||||
- [server/nodejs/README.md](server/nodejs/README.md) — Node.js server setup
|
||||
|
||||
85
README.md
85
README.md
@@ -7,14 +7,14 @@ A real-time speech-to-text desktop application for streamers. Runs locally on yo
|
||||
## Features
|
||||
|
||||
- **Real-Time Transcription**: Live speech-to-text using Whisper models with minimal latency
|
||||
- **Cloud-First**: Defaults to Deepgram cloud transcription — get started with just an API key
|
||||
- **Cross-Platform**: Native desktop app for Windows, macOS, and Linux via [Tauri](https://tauri.app/)
|
||||
- **Dual Transcription Modes**: Local (Whisper) or cloud (Deepgram) with managed billing or BYOK
|
||||
- **CPU & GPU Support**: Automatic detection of CUDA (NVIDIA), MPS (Apple Silicon), or CPU fallback
|
||||
- **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
|
||||
- **Dual Transcription Modes**: Cloud (Deepgram) or local (Whisper) with automatic GPU/CPU detection
|
||||
- **Shared Captions**: Create a room and share a code so others can join — no server setup needed
|
||||
- **OBS Integration**: Built-in web server for browser source capture at `http://localhost:8080`
|
||||
- **Multi-User Sync**: Optional Node.js server to sync transcriptions across multiple users
|
||||
- **Custom Fonts**: Support for system fonts, web-safe fonts, Google Fonts, and custom font files
|
||||
- **Customizable Colors**: User-configurable colors for name, text, and background
|
||||
- **Advanced Voice Detection**: Dual-layer VAD (WebRTC + Silero) for accurate speech detection
|
||||
- **Noise Suppression**: Built-in audio preprocessing to reduce background noise
|
||||
- **Auto-Updates**: Automatic update checking with release notes display
|
||||
|
||||
@@ -87,27 +87,30 @@ For detailed build instructions, see [BUILD.md](BUILD.md).
|
||||
|
||||
## Usage
|
||||
|
||||
### Standalone Mode
|
||||
### Quick Setup (Cloud — Recommended)
|
||||
|
||||
1. Launch the application
|
||||
2. Select your microphone from the audio device dropdown
|
||||
3. Choose a Whisper model (smaller = faster, larger = more accurate):
|
||||
2. Open **Settings** — the transcription mode defaults to **Cloud (Deepgram)**
|
||||
3. Get a free API key at [console.deepgram.com](https://console.deepgram.com) and paste it in Settings
|
||||
4. Select your microphone from the audio device dropdown
|
||||
5. Click **Start Transcription**
|
||||
6. Transcriptions appear in the main window and at `http://localhost:8080`
|
||||
|
||||
> The Start button is disabled until an API key is entered. Local-only settings (model, VAD, timing) are hidden in cloud mode to keep things simple.
|
||||
|
||||
### Local Mode (Whisper)
|
||||
|
||||
For offline/on-device transcription, switch to **Local (Whisper)** in Settings:
|
||||
|
||||
1. Choose a Whisper model (smaller = faster, larger = more accurate):
|
||||
- `tiny.en` / `tiny` — Fastest, good for quick captions
|
||||
- `base.en` / `base` — Balanced speed and accuracy
|
||||
- `small.en` / `small` — Better accuracy
|
||||
- `medium.en` / `medium` — High accuracy
|
||||
- `large-v3` — Best accuracy (requires more resources)
|
||||
4. Click **Start** to begin transcription
|
||||
5. Transcriptions appear in the main window and at `http://localhost:8080`
|
||||
|
||||
### Remote Transcription (Deepgram)
|
||||
|
||||
Instead of local Whisper models, you can use cloud-based transcription:
|
||||
|
||||
- **Managed mode**: Sign up via the transcription proxy for metered billing
|
||||
- **BYOK mode**: Bring your own Deepgram API key for direct access
|
||||
|
||||
Configure in Settings > Remote Transcription.
|
||||
2. Select compute device (Auto/CUDA/CPU) and compute type
|
||||
3. Tune VAD sensitivity and timing settings as needed
|
||||
4. Click **Start Transcription**
|
||||
|
||||
### OBS Browser Source Setup
|
||||
|
||||
@@ -117,18 +120,42 @@ Configure in Settings > Remote Transcription.
|
||||
4. Set dimensions (e.g., 1920x300)
|
||||
5. Check "Shutdown source when not visible" for performance
|
||||
|
||||
### Multi-User Mode (Optional)
|
||||
### Shared Captions (Multi-User)
|
||||
|
||||
For syncing transcriptions across multiple users (e.g., multi-host streams or translation teams):
|
||||
Share live captions across multiple users using the hosted service at `https://caption.shadowdao.com/` — no server setup required.
|
||||
|
||||
1. Deploy the Node.js server (see [server/nodejs/README.md](server/nodejs/README.md))
|
||||
2. In the app settings, enable **Server Sync**
|
||||
3. Enter the server URL (e.g., `http://your-server:3000/api/send`)
|
||||
4. Set a room name and passphrase (shared with other users)
|
||||
5. In OBS, use the server's display URL with your room name:
|
||||
```
|
||||
http://your-server:3000/display?room=YOURROOM×tamps=true&maxlines=50
|
||||
```
|
||||
#### Creating a Room
|
||||
|
||||
1. Open **Settings** and enable **Shared Captions**
|
||||
2. Click **Create Room** — this generates a room name and passphrase automatically
|
||||
3. A **share code** is generated and copied to your clipboard
|
||||
4. Send the share code to anyone who should join
|
||||
|
||||
#### Joining a Room
|
||||
|
||||
1. Open **Settings** and enable **Shared Captions**
|
||||
2. Paste the share code you received into the **"Paste share code to join"** field
|
||||
3. Click **Join** — the server URL, room, and passphrase are auto-filled
|
||||
4. Click **Save**
|
||||
|
||||
#### Sharing an Existing Room
|
||||
|
||||
If you already have a room configured and want to invite others:
|
||||
|
||||
1. Open **Settings** and scroll to **Shared Captions**
|
||||
2. Click **Share Current Room** — generates a share code from your current config and copies it to the clipboard
|
||||
3. Send the code to others
|
||||
|
||||
#### OBS Display for Shared Rooms
|
||||
|
||||
In OBS, add a Browser source pointing to the server's display URL:
|
||||
```
|
||||
https://caption.shadowdao.com/display?room=YOURROOM×tamps=true&maxlines=50
|
||||
```
|
||||
|
||||
#### Self-Hosting
|
||||
|
||||
You can also self-host the sync server. See [server/nodejs/README.md](server/nodejs/README.md) for setup instructions, then enter your own server URL in the Shared Captions settings.
|
||||
|
||||
## Configuration
|
||||
|
||||
@@ -144,7 +171,7 @@ Settings are stored at `~/.local-transcription/config.yaml` and can be modified
|
||||
| `transcription.silero_sensitivity` | VAD sensitivity (0-1, lower = more sensitive) | `0.4` |
|
||||
| `transcription.post_speech_silence_duration` | Silence before finalizing (seconds) | `0.3` |
|
||||
| `transcription.continuous_mode` | Fast speaker mode for quick talkers | `false` |
|
||||
| `remote.mode` | Transcription mode (local/managed/byok) | `local` |
|
||||
| `remote.mode` | Transcription mode (local/managed/byok) | `byok` |
|
||||
| `display.show_timestamps` | Show timestamps with transcriptions | `true` |
|
||||
| `display.fade_after_seconds` | Fade out time (0 = never) | `10` |
|
||||
| `display.font_source` | Font type (System Font/Web-Safe/Google Font/Custom File) | `System Font` |
|
||||
|
||||
136
SIGNING.md
Normal file
136
SIGNING.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# Code Signing Setup
|
||||
|
||||
This document explains how to configure code signing for Local Transcription so that Windows and macOS installers are trusted by the operating system.
|
||||
|
||||
## Overview
|
||||
|
||||
Without code signing:
|
||||
- **Windows**: SmartScreen shows "Windows protected your PC" warnings
|
||||
- **macOS**: Gatekeeper blocks the app — "app can't be opened because it is from an unidentified developer"
|
||||
|
||||
The CI/CD workflows are configured to sign automatically when the required secrets are present. Without secrets, builds still work — they just produce unsigned installers.
|
||||
|
||||
---
|
||||
|
||||
## Windows — Azure Artifact Signing
|
||||
|
||||
**Cost**: ~$9.99/month (up to 5,000 signatures)
|
||||
|
||||
### 1. Create an Azure Account
|
||||
|
||||
Sign up at https://azure.microsoft.com if you don't already have one.
|
||||
|
||||
### 2. Set Up Artifact Signing
|
||||
|
||||
1. In the Azure Portal, search for **Artifact Signing**
|
||||
2. Create a new **Artifact Signing Account**
|
||||
- Choose a region (e.g., West US 2) — note this for the endpoint URL
|
||||
- The endpoint will be like `https://wus2.codesigning.azure.net/`
|
||||
3. Complete **Identity Verification** (required before you can create certificate profiles)
|
||||
4. Create a **Certificate Profile** with type "Public Trust" for code signing
|
||||
|
||||
### 3. Create an App Registration (Service Principal)
|
||||
|
||||
This allows CI to authenticate to Azure:
|
||||
|
||||
1. Go to **Azure Active Directory** > **App registrations** > **New registration**
|
||||
2. Name it (e.g., `local-transcription-signing`)
|
||||
3. After creation, note the **Application (client) ID** and **Directory (tenant) ID**
|
||||
4. Go to **Certificates & secrets** > **New client secret** — note the secret value
|
||||
5. Grant the app registration the **Artifact Signing Certificate Profile Signer** role on your Artifact Signing Account
|
||||
|
||||
### 4. Add Gitea Secrets
|
||||
|
||||
In your Gitea repository, go to **Settings** > **Actions** > **Secrets** and add:
|
||||
|
||||
| Secret Name | Value |
|
||||
|-------------|-------|
|
||||
| `AZURE_CLIENT_ID` | App registration Application (client) ID |
|
||||
| `AZURE_CLIENT_SECRET` | App registration client secret value |
|
||||
| `AZURE_TENANT_ID` | Directory (tenant) ID |
|
||||
| `AZURE_SIGNING_ENDPOINT` | Artifact Signing endpoint URL (e.g., `https://wus2.codesigning.azure.net/`) |
|
||||
| `AZURE_SIGNING_ACCOUNT` | Artifact Signing account name |
|
||||
| `AZURE_CERT_PROFILE` | Certificate profile name |
|
||||
|
||||
---
|
||||
|
||||
## macOS — Apple Developer Code Signing + Notarization
|
||||
|
||||
**Cost**: $99/year (Apple Developer Program)
|
||||
|
||||
### 1. Enroll in the Apple Developer Program
|
||||
|
||||
Sign up at https://developer.apple.com/programs/
|
||||
|
||||
### 2. Create a Developer ID Certificate
|
||||
|
||||
1. Open **Xcode** > **Settings** > **Accounts** > select your team > **Manage Certificates**
|
||||
2. Click **+** > **Developer ID Application**
|
||||
3. Or create via the Apple Developer portal: **Certificates, Identifiers & Profiles** > **Certificates** > **+** > **Developer ID Application**
|
||||
|
||||
### 3. Export the Certificate as .p12
|
||||
|
||||
1. Open **Keychain Access**
|
||||
2. Find your **Developer ID Application** certificate
|
||||
3. Right-click > **Export** > save as `.p12` with a password
|
||||
4. Base64-encode it:
|
||||
```bash
|
||||
base64 -i certificate.p12 | tr -d '\n'
|
||||
```
|
||||
|
||||
### 4. Create an App Store Connect API Key
|
||||
|
||||
This is used for notarization (submitting the app to Apple for verification):
|
||||
|
||||
1. Go to https://appstoreconnect.apple.com/access/integrations/api
|
||||
2. Click **Generate API Key**
|
||||
3. Give it a name and **Developer** role (minimum)
|
||||
4. Download the `.p8` private key file (you can only download it once)
|
||||
5. Note the **Key ID** and **Issuer ID** shown on the page
|
||||
|
||||
### 5. Find Your Signing Identity
|
||||
|
||||
Your signing identity looks like:
|
||||
```
|
||||
Developer ID Application: Your Name (TEAMID)
|
||||
```
|
||||
|
||||
You can find it by running:
|
||||
```bash
|
||||
security find-identity -v -p codesigning
|
||||
```
|
||||
|
||||
### 6. Add Gitea Secrets
|
||||
|
||||
| Secret Name | Value |
|
||||
|-------------|-------|
|
||||
| `APPLE_CERTIFICATE` | Base64-encoded .p12 certificate (from step 3) |
|
||||
| `APPLE_CERTIFICATE_PASSWORD` | Password used when exporting the .p12 |
|
||||
| `APPLE_SIGNING_IDENTITY` | Full identity string (e.g., `Developer ID Application: Your Name (TEAMID)`) |
|
||||
| `APPLE_API_KEY` | App Store Connect API Key ID |
|
||||
| `APPLE_API_ISSUER` | API issuer UUID |
|
||||
| `APPLE_API_KEY_CONTENT` | Full contents of the `.p8` private key file |
|
||||
|
||||
---
|
||||
|
||||
## Verifying Signing Works
|
||||
|
||||
### Trigger a Build
|
||||
|
||||
Both build workflows use `workflow_dispatch`, so you can trigger them manually in Gitea:
|
||||
|
||||
1. Go to **Actions** > select the workflow > **Run workflow**
|
||||
2. Enter the release tag (e.g., `v2.0.15`)
|
||||
|
||||
### Check macOS
|
||||
|
||||
After installing the `.dmg`, the app should open without any Gatekeeper warnings. You can also verify from the command line:
|
||||
|
||||
```bash
|
||||
codesign -dv --verbose=4 /Applications/Local\ Transcription.app
|
||||
spctl --assess --type execute /Applications/Local\ Transcription.app
|
||||
```
|
||||
|
||||
### Check Windows
|
||||
|
||||
After running the `.msi` or `-setup.exe`, there should be no SmartScreen warning. The installer properties should show your organization name as the publisher.
|
||||
@@ -73,8 +73,15 @@ class APIServer:
|
||||
original_state_cb = self.controller.on_state_changed
|
||||
|
||||
def on_state_changed(state: str, message: str):
|
||||
# Isolate the upstream callback so a failure there (e.g. a
|
||||
# broken stdout pipe in main_headless) cannot propagate into
|
||||
# _set_state and tear down engine init / reload_engine /
|
||||
# apply_settings request handling.
|
||||
if original_state_cb:
|
||||
try:
|
||||
original_state_cb(state, message)
|
||||
except Exception:
|
||||
pass
|
||||
self._broadcast_control({"type": "state_changed", "state": state, "message": message})
|
||||
|
||||
self.controller.on_state_changed = on_state_changed
|
||||
@@ -212,7 +219,11 @@ class APIServer:
|
||||
|
||||
@app.put("/api/config")
|
||||
async def update_config(update: ConfigUpdate):
|
||||
engine_reloaded, message = ctrl.apply_settings(update.settings)
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
engine_reloaded, message = await loop.run_in_executor(
|
||||
None, ctrl.apply_settings, update.settings
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"message": message,
|
||||
@@ -269,6 +280,7 @@ class APIServer:
|
||||
data = resp.json()
|
||||
ctrl.config.set('remote.auth_token', data.get('token', ''))
|
||||
ctrl.config.set('remote.server_url', req.server_url)
|
||||
ctrl.config.set('remote.email', req.email)
|
||||
return {"status": "ok", "token": data.get('token', '')}
|
||||
else:
|
||||
raise HTTPException(status_code=resp.status_code, detail=resp.text)
|
||||
|
||||
@@ -276,7 +276,6 @@ class AppController:
|
||||
self.current_model_size = model
|
||||
self.current_device_config = device_config
|
||||
|
||||
user_name = self.config.get('user.name', 'User')
|
||||
continuous_mode = self.config.get('transcription.continuous_mode', False)
|
||||
|
||||
if continuous_mode:
|
||||
@@ -293,7 +292,6 @@ class AppController:
|
||||
if remote_mode in ('managed', 'byok'):
|
||||
self.transcription_engine = DeepgramTranscriptionEngine(
|
||||
config=self.config,
|
||||
user_name=user_name,
|
||||
input_device_index=audio_device,
|
||||
)
|
||||
self.transcription_engine.set_callbacks(
|
||||
@@ -343,7 +341,7 @@ class AppController:
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
no_log_file=self.config.get('transcription.no_log_file', True),
|
||||
input_device_index=audio_device,
|
||||
user_name=user_name,
|
||||
app_config=self.config,
|
||||
)
|
||||
self.transcription_engine.set_callbacks(
|
||||
realtime_callback=self._on_realtime_transcription,
|
||||
@@ -608,8 +606,17 @@ class AppController:
|
||||
Returns (engine_reload_needed, message).
|
||||
"""
|
||||
if new_config:
|
||||
for key, value in new_config.items():
|
||||
self.config.set(key, value)
|
||||
# Flatten nested dicts into dot-notation keys so we merge
|
||||
# individual values instead of replacing entire sections
|
||||
# (e.g. remote.mode instead of overwriting all of remote)
|
||||
def _flatten(d, prefix=""):
|
||||
for k, v in d.items():
|
||||
full_key = f"{prefix}{k}" if not prefix else f"{prefix}.{k}"
|
||||
if isinstance(v, dict):
|
||||
_flatten(v, full_key)
|
||||
else:
|
||||
self.config.set(full_key, v)
|
||||
_flatten(new_config)
|
||||
|
||||
# Update web server display settings
|
||||
if self.web_server:
|
||||
@@ -682,6 +689,7 @@ class AppController:
|
||||
"transcription_count": len(self.transcriptions),
|
||||
"remote_mode": remote_mode,
|
||||
"server_sync_enabled": self.config.get('server_sync.enabled', False),
|
||||
"is_cloud_only": self.is_cloud_only,
|
||||
}
|
||||
|
||||
def get_audio_devices(self) -> list[dict]:
|
||||
|
||||
@@ -75,10 +75,16 @@ def main():
|
||||
# Create controller and initialize
|
||||
controller = AppController(config=config)
|
||||
|
||||
# Wire a state callback that prints the ready event
|
||||
# Wire a state callback that prints state events for the parent
|
||||
# process to read. Stdout writes can fail with EINVAL on Windows
|
||||
# when the parent stops reading the sidecar pipe; swallow those
|
||||
# so the engine state machine isn't taken down by a logging path.
|
||||
def on_state_changed(state, message):
|
||||
event = {"event": "state", "state": state, "message": message}
|
||||
try:
|
||||
print(json.dumps(event), flush=True)
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
controller.on_state_changed = on_state_changed
|
||||
|
||||
|
||||
@@ -125,6 +125,8 @@ def test_apply_settings_no_reload_when_same(controller):
|
||||
# Ensure config returns the same values
|
||||
controller.config.set("transcription.model", "base.en")
|
||||
controller.config.set("transcription.device", "auto")
|
||||
# Remote mode must also match (no engine means current mode is 'local')
|
||||
controller.config.set("remote.mode", "local")
|
||||
|
||||
controller.reload_engine = MagicMock(return_value=(True, "reloaded"))
|
||||
|
||||
|
||||
@@ -36,18 +36,16 @@ class DeepgramTranscriptionEngine:
|
||||
# Construction / configuration
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def __init__(self, config, user_name: str = "User", input_device_index: Optional[int] = None):
|
||||
def __init__(self, config, input_device_index: Optional[int] = None):
|
||||
"""
|
||||
Initialise the engine from a :class:`client.config.Config` object.
|
||||
|
||||
Args:
|
||||
config: Application ``Config`` instance.
|
||||
user_name: Display name attached to transcriptions.
|
||||
input_device_index: Index of the audio input device to use
|
||||
(``None`` for the system default).
|
||||
"""
|
||||
self.config = config
|
||||
self.user_name = user_name
|
||||
self.input_device_index = input_device_index
|
||||
|
||||
# Mode: 'managed' (proxy) or 'byok' (direct Deepgram)
|
||||
@@ -320,9 +318,13 @@ class DeepgramTranscriptionEngine:
|
||||
def _build_ws_url_and_headers(self):
|
||||
"""Return ``(url, headers)`` depending on the current mode."""
|
||||
if self.mode == "managed":
|
||||
# Ensure the server URL uses wss:// and append the path
|
||||
# Convert HTTP(S) URLs to WS(S) for WebSocket connection
|
||||
url = self.server_url.rstrip("/")
|
||||
if not url.startswith("ws://") and not url.startswith("wss://"):
|
||||
if url.startswith("https://"):
|
||||
url = "wss://" + url[len("https://"):]
|
||||
elif url.startswith("http://"):
|
||||
url = "ws://" + url[len("http://"):]
|
||||
elif not url.startswith("ws://") and not url.startswith("wss://"):
|
||||
url = f"wss://{url}"
|
||||
url = f"{url}/ws/transcribe"
|
||||
return url, {}
|
||||
@@ -450,7 +452,7 @@ class DeepgramTranscriptionEngine:
|
||||
text=text,
|
||||
is_final=is_final,
|
||||
timestamp=datetime.now(),
|
||||
user_name=self.user_name,
|
||||
user_name=self.config.get('user.name', 'User'),
|
||||
)
|
||||
if is_final:
|
||||
if self.final_callback:
|
||||
@@ -501,7 +503,7 @@ class DeepgramTranscriptionEngine:
|
||||
text=transcript,
|
||||
is_final=is_final,
|
||||
timestamp=datetime.now(),
|
||||
user_name=self.user_name,
|
||||
user_name=self.config.get('user.name', 'User'),
|
||||
)
|
||||
if is_final:
|
||||
if self.final_callback:
|
||||
@@ -532,10 +534,6 @@ class DeepgramTranscriptionEngine:
|
||||
pass
|
||||
self._ws = None
|
||||
|
||||
def set_user_name(self, user_name: str):
|
||||
"""Update the user name attached to future transcriptions."""
|
||||
self.user_name = user_name
|
||||
|
||||
def is_recording_active(self) -> bool:
|
||||
"""Return ``True`` if audio is currently being captured."""
|
||||
return self._is_recording
|
||||
|
||||
@@ -58,8 +58,8 @@ class RealtimeTranscriptionEngine:
|
||||
no_log_file: bool = True,
|
||||
# Audio device
|
||||
input_device_index: Optional[int] = None,
|
||||
# User name
|
||||
user_name: str = ""
|
||||
# App config (for reading user.name at transcription time)
|
||||
app_config=None
|
||||
):
|
||||
"""
|
||||
Initialize RealtimeSTT transcription engine.
|
||||
@@ -82,7 +82,7 @@ class RealtimeTranscriptionEngine:
|
||||
initial_prompt: Optional prompt to guide transcription
|
||||
no_log_file: Disable RealtimeSTT logging
|
||||
input_device_index: Audio input device index
|
||||
user_name: User name for transcriptions
|
||||
app_config: App Config object for reading user.name dynamically
|
||||
"""
|
||||
self.model = model
|
||||
self.language = language
|
||||
@@ -100,7 +100,7 @@ class RealtimeTranscriptionEngine:
|
||||
self.enable_realtime = enable_realtime_transcription
|
||||
self.realtime_model = realtime_model
|
||||
self.realtime_processing_pause = realtime_processing_pause
|
||||
self.user_name = user_name
|
||||
self.app_config = app_config
|
||||
|
||||
# Callbacks
|
||||
self.realtime_callback: Optional[Callable[[TranscriptionResult], None]] = None
|
||||
@@ -162,6 +162,11 @@ class RealtimeTranscriptionEngine:
|
||||
self.realtime_callback = realtime_callback
|
||||
self.final_callback = final_callback
|
||||
|
||||
def _get_user_name(self) -> str:
|
||||
if self.app_config:
|
||||
return self.app_config.get('user.name', '')
|
||||
return ''
|
||||
|
||||
def _on_realtime_transcription(self, text: str):
|
||||
"""Internal callback for realtime transcriptions."""
|
||||
if self.realtime_callback and text.strip():
|
||||
@@ -169,7 +174,7 @@ class RealtimeTranscriptionEngine:
|
||||
text=text,
|
||||
is_final=False,
|
||||
timestamp=datetime.now(),
|
||||
user_name=self.user_name
|
||||
user_name=self._get_user_name()
|
||||
)
|
||||
self.realtime_callback(result)
|
||||
|
||||
@@ -180,7 +185,7 @@ class RealtimeTranscriptionEngine:
|
||||
text=text,
|
||||
is_final=True,
|
||||
timestamp=datetime.now(),
|
||||
user_name=self.user_name
|
||||
user_name=self._get_user_name()
|
||||
)
|
||||
self.final_callback(result)
|
||||
|
||||
@@ -406,10 +411,6 @@ class RealtimeTranscriptionEngine:
|
||||
if self.is_recording:
|
||||
print("VAD settings updated. Restart transcription to apply changes.")
|
||||
|
||||
def set_user_name(self, user_name: str):
|
||||
"""Set the user name for transcriptions."""
|
||||
self.user_name = user_name
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"RealtimeTranscriptionEngine(model={self.model}, device={self.device}, running={self.is_recording})"
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ transcription:
|
||||
|
||||
server_sync:
|
||||
enabled: false
|
||||
url: "http://localhost:3000/api/send"
|
||||
url: ""
|
||||
room: "default"
|
||||
passphrase: ""
|
||||
# Font settings are now in the display section (shared for local and server sync)
|
||||
@@ -69,9 +69,10 @@ web_server:
|
||||
host: "127.0.0.1"
|
||||
|
||||
remote:
|
||||
mode: local # local | managed | byok
|
||||
server_url: "" # Proxy server URL for managed mode (e.g., wss://your-proxy.com)
|
||||
mode: byok # local | managed | byok
|
||||
server_url: "https://transcribe.shadowdao.com" # Proxy server URL for managed mode
|
||||
auth_token: "" # JWT stored after login (managed mode)
|
||||
email: "" # Email of the logged-in managed-mode account (for UI display)
|
||||
byok_api_key: "" # Deepgram API key for BYOK mode
|
||||
deepgram_model: nova-2 # Deepgram model to use
|
||||
language: en-US # Language code
|
||||
|
||||
@@ -401,7 +401,6 @@ class MainWindow(QMainWindow):
|
||||
# Use Deepgram-based remote transcription
|
||||
self.transcription_engine = DeepgramTranscriptionEngine(
|
||||
config=self.config,
|
||||
user_name=user_name,
|
||||
input_device_index=audio_device
|
||||
)
|
||||
self.transcription_engine.set_callbacks(
|
||||
@@ -431,7 +430,7 @@ class MainWindow(QMainWindow):
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
no_log_file=self.config.get('transcription.no_log_file', True),
|
||||
input_device_index=audio_device,
|
||||
user_name=user_name
|
||||
app_config=self.config
|
||||
)
|
||||
|
||||
# Set up callbacks for transcription results
|
||||
|
||||
@@ -19,9 +19,26 @@ datas = [
|
||||
('config/default_config.yaml', 'config'),
|
||||
]
|
||||
|
||||
# Collect sounddevice's bundled PortAudio library (_sounddevice_data)
|
||||
try:
|
||||
import sounddevice
|
||||
sd_path = os.path.dirname(sounddevice.__file__)
|
||||
sd_data = os.path.join(sd_path, '_sounddevice_data')
|
||||
if os.path.exists(sd_data):
|
||||
datas.append((sd_data, '_sounddevice_data'))
|
||||
print(f" + Collected sounddevice PortAudio data from {sd_data}")
|
||||
# Also collect the package itself
|
||||
sd_datas = collect_data_files('sounddevice')
|
||||
if sd_datas:
|
||||
datas += sd_datas
|
||||
print(f" + Collected {len(sd_datas)} sounddevice data files")
|
||||
except ImportError:
|
||||
print(" - Warning: sounddevice not found")
|
||||
|
||||
# Hidden imports -- only lightweight deps needed for Deepgram streaming
|
||||
hiddenimports = [
|
||||
'sounddevice',
|
||||
'_sounddevice_data',
|
||||
'numpy',
|
||||
# FastAPI and dependencies
|
||||
'fastapi',
|
||||
|
||||
@@ -38,6 +38,21 @@ datas = [
|
||||
(vad_assets_path, 'faster_whisper/assets'),
|
||||
] + pvporcupine_data_files
|
||||
|
||||
# Collect sounddevice's bundled PortAudio library (_sounddevice_data)
|
||||
try:
|
||||
import sounddevice
|
||||
sd_path = os.path.dirname(sounddevice.__file__)
|
||||
sd_data = os.path.join(sd_path, '_sounddevice_data')
|
||||
if os.path.exists(sd_data):
|
||||
datas.append((sd_data, '_sounddevice_data'))
|
||||
print(f" + Collected sounddevice PortAudio data from {sd_data}")
|
||||
sd_datas = collect_data_files('sounddevice')
|
||||
if sd_datas:
|
||||
datas += sd_datas
|
||||
print(f" + Collected {len(sd_datas)} sounddevice data files")
|
||||
except ImportError:
|
||||
print(" - Warning: sounddevice not found")
|
||||
|
||||
# Hidden imports -- NO PySide6/Qt needed for headless backend
|
||||
hiddenimports = [
|
||||
# Transcription engine
|
||||
@@ -46,6 +61,7 @@ hiddenimports = [
|
||||
'faster_whisper.vad',
|
||||
'ctranslate2',
|
||||
'sounddevice',
|
||||
'_sounddevice_data',
|
||||
'scipy',
|
||||
'scipy.signal',
|
||||
'numpy',
|
||||
|
||||
@@ -90,7 +90,7 @@ class TranscriptionCLI:
|
||||
initial_prompt=self.config.get('transcription.initial_prompt', ''),
|
||||
no_log_file=True,
|
||||
input_device_index=audio_device,
|
||||
user_name=user_name
|
||||
app_config=self.config
|
||||
)
|
||||
|
||||
# Set up callbacks
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "local-transcription",
|
||||
"private": true,
|
||||
"version": "2.0.10",
|
||||
"version": "2.0.20",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "local-transcription"
|
||||
version = "1.0.9"
|
||||
version = "1.0.15"
|
||||
description = "A standalone desktop application for real-time speech-to-text transcription using Whisper models"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
|
||||
@@ -703,6 +703,36 @@ app.post('/api/send', async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Create room explicitly (no transcription needed)
|
||||
app.post('/api/create-room', async (req, res) => {
|
||||
try {
|
||||
const { room, passphrase } = req.body;
|
||||
|
||||
if (!room || !passphrase) {
|
||||
return res.status(400).json({ error: 'Missing room or passphrase' });
|
||||
}
|
||||
|
||||
// Check if room already exists
|
||||
const existing = await loadRoom(room);
|
||||
if (existing) {
|
||||
const valid = await verifyPassphrase(room, passphrase);
|
||||
if (!valid) {
|
||||
return res.status(401).json({ error: 'Room exists with different passphrase' });
|
||||
}
|
||||
return res.json({ status: 'ok', room, created: false, message: 'Room already exists' });
|
||||
}
|
||||
|
||||
// Create the room (verifyPassphrase creates it if it doesn't exist)
|
||||
await verifyPassphrase(room, passphrase);
|
||||
|
||||
console.log(`[Room] Created room "${room}"`);
|
||||
res.json({ status: 'ok', room, created: true });
|
||||
} catch (err) {
|
||||
console.error('Error in /api/create-room:', err);
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// List transcriptions
|
||||
app.get('/api/list', async (req, res) => {
|
||||
try {
|
||||
|
||||
2
src-tauri/Cargo.lock
generated
2
src-tauri/Cargo.lock
generated
@@ -1881,7 +1881,7 @@ checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
|
||||
|
||||
[[package]]
|
||||
name = "local-transcription"
|
||||
version = "2.0.3"
|
||||
version = "2.0.12"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "local-transcription"
|
||||
version = "2.0.10"
|
||||
version = "2.0.20"
|
||||
description = "Real-time speech-to-text transcription for streamers"
|
||||
authors = ["Local Transcription Contributors"]
|
||||
edition = "2021"
|
||||
|
||||
14
src-tauri/Entitlements.plist
Normal file
14
src-tauri/Entitlements.plist
Normal file
@@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>com.apple.security.device.audio-input</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.client</key>
|
||||
<true/>
|
||||
<key>com.apple.security.network.server</key>
|
||||
<true/>
|
||||
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</plist>
|
||||
8
src-tauri/Info.plist
Normal file
8
src-tauri/Info.plist
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>NSMicrophoneUsageDescription</key>
|
||||
<string>Local Transcription needs microphone access for real-time speech-to-text transcription.</string>
|
||||
</dict>
|
||||
</plist>
|
||||
@@ -74,8 +74,8 @@ pub fn run() {
|
||||
.build(tauri::generate_context!())
|
||||
.expect("error while building tauri application")
|
||||
.run(|app, event| {
|
||||
if let tauri::RunEvent::Exit = event {
|
||||
// Stop the sidecar when the app exits
|
||||
match event {
|
||||
tauri::RunEvent::Exit => {
|
||||
if let Some(state) = app.try_state::<sidecar::ManagedSidecar>() {
|
||||
if let Ok(mut mgr) = state.0.lock() {
|
||||
eprintln!("[app] Stopping sidecar on exit...");
|
||||
@@ -83,5 +83,16 @@ pub fn run() {
|
||||
}
|
||||
}
|
||||
}
|
||||
tauri::RunEvent::ExitRequested { .. } => {
|
||||
// Also stop sidecar on exit request (Cmd+Q on macOS)
|
||||
if let Some(state) = app.try_state::<sidecar::ManagedSidecar>() {
|
||||
if let Ok(mut mgr) = state.0.lock() {
|
||||
eprintln!("[app] Stopping sidecar on exit request...");
|
||||
mgr.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"productName": "Local Transcription",
|
||||
"version": "2.0.10",
|
||||
"version": "2.0.20",
|
||||
"identifier": "net.anhonesthost.local-transcription",
|
||||
"build": {
|
||||
"frontendDist": "../dist",
|
||||
@@ -33,7 +33,10 @@
|
||||
"icons/icon.icns",
|
||||
"icons/icon.ico",
|
||||
"icons/icon.png"
|
||||
]
|
||||
],
|
||||
"windows": {
|
||||
"digestAlgorithm": "sha256"
|
||||
}
|
||||
},
|
||||
"plugins": {
|
||||
"shell": {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
let sidecarState = $state<SidecarState>("checking");
|
||||
let debugLog = $state("");
|
||||
let availableUpdate = $state("");
|
||||
let appVersion = $state("");
|
||||
|
||||
let obsDisplayUrl = $derived(backendStore.obsUrl);
|
||||
let syncDisplayUrl = $derived(backendStore.syncUrl);
|
||||
@@ -108,6 +109,14 @@
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
// Get app version from Tauri
|
||||
import("@tauri-apps/api/app").then(({ getVersion }) =>
|
||||
getVersion().then((v) => { appVersion = v; })
|
||||
).catch(() => {
|
||||
// Browser dev mode -- read from package.json or use fallback
|
||||
appVersion = "dev";
|
||||
});
|
||||
|
||||
checkAndLaunchSidecar();
|
||||
|
||||
return () => {
|
||||
@@ -201,7 +210,7 @@
|
||||
<TranscriptionDisplay />
|
||||
<Controls />
|
||||
|
||||
<div class="version-label">v{backendStore.version}</div>
|
||||
<div class="version-label">v{appVersion || backendStore.version}</div>
|
||||
</div>
|
||||
|
||||
{#if showSettings}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
<script lang="ts">
|
||||
import { backendStore } from "$lib/stores/backend";
|
||||
import { configStore } from "$lib/stores/config";
|
||||
import { transcriptionStore } from "$lib/stores/transcriptions";
|
||||
|
||||
let isTranscribing = $derived(backendStore.appState === "transcribing");
|
||||
@@ -8,6 +9,16 @@
|
||||
);
|
||||
let isLoading = $state(false);
|
||||
|
||||
let remoteMode = $derived(configStore.config.remote.mode);
|
||||
let byokApiKey = $derived(configStore.config.remote.byok_api_key);
|
||||
let authToken = $derived(configStore.config.remote.auth_token);
|
||||
|
||||
let cloudConfigured = $derived(
|
||||
remoteMode === "local" ||
|
||||
(remoteMode === "byok" && byokApiKey.trim() !== "") ||
|
||||
(remoteMode === "managed" && authToken.trim() !== "")
|
||||
);
|
||||
|
||||
let errorMessage = $state("");
|
||||
|
||||
async function toggleTranscription() {
|
||||
@@ -20,14 +31,17 @@
|
||||
} else {
|
||||
await backendStore.apiPost("/api/start");
|
||||
}
|
||||
// Poll status to update UI immediately instead of waiting
|
||||
// for WebSocket broadcast (which can be delayed or missed)
|
||||
await backendStore.pollStatus();
|
||||
} catch (err: unknown) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
// Ignore "Already transcribing/not transcribing" -- just sync the state
|
||||
if (!msg.includes("400")) {
|
||||
console.error("Failed to toggle transcription:", msg);
|
||||
errorMessage = msg;
|
||||
}
|
||||
} finally {
|
||||
// Always poll status to sync UI with actual backend state,
|
||||
// even if the API call failed (e.g. "Already transcribing")
|
||||
await backendStore.pollStatus();
|
||||
isLoading = false;
|
||||
}
|
||||
}
|
||||
@@ -91,7 +105,7 @@
|
||||
<button
|
||||
class={isTranscribing ? "danger" : "primary"}
|
||||
onclick={toggleTranscription}
|
||||
disabled={!isReady || isLoading}
|
||||
disabled={!isReady || isLoading || !cloudConfigured}
|
||||
>
|
||||
{#if isLoading}
|
||||
...
|
||||
@@ -113,6 +127,18 @@
|
||||
{#if errorMessage}
|
||||
<span class="error-msg">{errorMessage}</span>
|
||||
{/if}
|
||||
|
||||
{#if !cloudConfigured && isReady}
|
||||
<div class="cloud-warning">
|
||||
{#if remoteMode === "byok"}
|
||||
<span>API key required. Get one at
|
||||
<a href="https://console.deepgram.com" target="_blank" rel="noopener">console.deepgram.com</a>,
|
||||
then enter it in Settings.</span>
|
||||
{:else if remoteMode === "managed"}
|
||||
<span>Login required. Open Settings to log in.</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<style>
|
||||
@@ -122,6 +148,18 @@
|
||||
margin-left: 8px;
|
||||
}
|
||||
|
||||
.cloud-warning {
|
||||
font-size: 12px;
|
||||
color: #ff9800;
|
||||
margin-left: 8px;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.cloud-warning a {
|
||||
color: #4fc3f7;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
@@ -44,8 +44,20 @@
|
||||
let byokApiKey = $state("");
|
||||
let managedEmail = $state("");
|
||||
let managedPassword = $state("");
|
||||
let managedLoggedIn = $state(false);
|
||||
let autoCheckUpdates = $state(true);
|
||||
|
||||
let isCloudMode = $derived(remoteMode === "managed" || remoteMode === "byok");
|
||||
let isCloudOnly = $derived(
|
||||
computeDevices.some(d => d.id === "cloud")
|
||||
);
|
||||
|
||||
// Room creation / join state
|
||||
let shareCode = $state("");
|
||||
let joinCode = $state("");
|
||||
let roomCreating = $state(false);
|
||||
let roomCreateMessage = $state("");
|
||||
|
||||
let saving = $state(false);
|
||||
let saveMessage = $state("");
|
||||
|
||||
@@ -120,6 +132,8 @@
|
||||
remoteMode = cfg.remote.mode;
|
||||
remoteServerUrl = cfg.remote.server_url;
|
||||
byokApiKey = cfg.remote.byok_api_key ?? "";
|
||||
managedEmail = cfg.remote.email ?? "";
|
||||
managedLoggedIn = !!(cfg.remote.auth_token && cfg.remote.email);
|
||||
autoCheckUpdates = cfg.updates.auto_check;
|
||||
});
|
||||
|
||||
@@ -199,7 +213,7 @@
|
||||
},
|
||||
remote: {
|
||||
mode: remoteMode,
|
||||
server_url: remoteServerUrl,
|
||||
server_url: remoteServerUrl || MANAGED_SERVER_URL,
|
||||
byok_api_key: byokApiKey,
|
||||
},
|
||||
updates: {
|
||||
@@ -244,25 +258,143 @@
|
||||
}
|
||||
}
|
||||
|
||||
const MANAGED_SERVER_URL = "https://transcribe.shadowdao.com";
|
||||
|
||||
let loginMessage = $state("");
|
||||
|
||||
async function handleManagedLogin() {
|
||||
loginMessage = "";
|
||||
try {
|
||||
await backendStore.apiPost("/api/login", {
|
||||
email: managedEmail,
|
||||
password: managedPassword,
|
||||
server_url: remoteServerUrl || MANAGED_SERVER_URL,
|
||||
});
|
||||
loginMessage = "Logged in successfully!";
|
||||
managedPassword = "";
|
||||
managedLoggedIn = true;
|
||||
await configStore.fetchConfig();
|
||||
} catch (err) {
|
||||
console.error("Login failed:", err);
|
||||
loginMessage = "Login failed. Check your email and password.";
|
||||
}
|
||||
}
|
||||
|
||||
async function handleManagedRegister() {
|
||||
async function handleManagedLogout() {
|
||||
try {
|
||||
await backendStore.apiPost("/api/register", {
|
||||
email: managedEmail,
|
||||
password: managedPassword,
|
||||
await configStore.updateConfig({
|
||||
remote: { auth_token: "", email: "" },
|
||||
});
|
||||
managedLoggedIn = false;
|
||||
managedPassword = "";
|
||||
loginMessage = "";
|
||||
} catch (err) {
|
||||
console.error("Register failed:", err);
|
||||
console.error("Logout failed:", err);
|
||||
loginMessage = `Error: ${err}`;
|
||||
}
|
||||
}
|
||||
|
||||
const CAPTION_SERVER = "https://caption.shadowdao.com";
|
||||
|
||||
function generateRandomName(): string {
|
||||
const adjectives = ['swift', 'bright', 'cosmic', 'electric', 'turbo', 'mega', 'ultra', 'super', 'hyper', 'alpha'];
|
||||
const nouns = ['phoenix', 'dragon', 'tiger', 'falcon', 'comet', 'storm', 'blaze', 'thunder', 'frost', 'nebula'];
|
||||
const num = Math.floor(Math.random() * 10000);
|
||||
return `${adjectives[Math.floor(Math.random() * adjectives.length)]}-${nouns[Math.floor(Math.random() * nouns.length)]}-${num}`;
|
||||
}
|
||||
|
||||
function generateRandomPassphrase(): string {
|
||||
const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
|
||||
let result = '';
|
||||
for (let i = 0; i < 16; i++) {
|
||||
result += chars.charAt(Math.floor(Math.random() * chars.length));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function encodeShareCode(url: string, room: string, passphrase: string): string {
|
||||
return btoa(JSON.stringify({ url, room, passphrase }));
|
||||
}
|
||||
|
||||
function decodeShareCode(code: string): { url: string; room: string; passphrase: string } | null {
|
||||
try {
|
||||
const json = JSON.parse(atob(code.trim()));
|
||||
if (json.url && json.room && json.passphrase) {
|
||||
return json;
|
||||
}
|
||||
return null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function handleCreateRoom() {
|
||||
roomCreating = true;
|
||||
roomCreateMessage = "";
|
||||
shareCode = "";
|
||||
|
||||
const room = generateRandomName();
|
||||
const passphrase = generateRandomPassphrase();
|
||||
const serverSendUrl = `${CAPTION_SERVER}/api/send`;
|
||||
|
||||
try {
|
||||
const resp = await fetch(`${CAPTION_SERVER}/api/create-room`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ room, passphrase }),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const err = await resp.json().catch(() => ({ error: "Request failed" }));
|
||||
roomCreateMessage = `Error: ${err.error || resp.statusText}`;
|
||||
return;
|
||||
}
|
||||
|
||||
syncUrl = serverSendUrl;
|
||||
syncRoom = room;
|
||||
syncPassphrase = passphrase;
|
||||
syncEnabled = true;
|
||||
|
||||
shareCode = encodeShareCode(serverSendUrl, room, passphrase);
|
||||
roomCreateMessage = "Room created! Share the code below with others.";
|
||||
} catch (err) {
|
||||
roomCreateMessage = `Error: ${err instanceof Error ? err.message : String(err)}`;
|
||||
} finally {
|
||||
roomCreating = false;
|
||||
}
|
||||
}
|
||||
|
||||
function handleJoinRoom() {
|
||||
const decoded = decodeShareCode(joinCode);
|
||||
if (!decoded) {
|
||||
roomCreateMessage = "Invalid share code. Please check and try again.";
|
||||
return;
|
||||
}
|
||||
syncUrl = decoded.url;
|
||||
syncRoom = decoded.room;
|
||||
syncPassphrase = decoded.passphrase;
|
||||
syncEnabled = true;
|
||||
joinCode = "";
|
||||
roomCreateMessage = "Room joined! Fields have been auto-filled.";
|
||||
}
|
||||
|
||||
async function handleShareCurrentRoom() {
|
||||
const code = encodeShareCode(syncUrl, syncRoom, syncPassphrase);
|
||||
shareCode = code;
|
||||
try {
|
||||
await navigator.clipboard.writeText(code);
|
||||
roomCreateMessage = "Share code copied to clipboard!";
|
||||
} catch {
|
||||
roomCreateMessage = "Share code generated. Copy it from the field below.";
|
||||
}
|
||||
}
|
||||
|
||||
async function copyShareCode() {
|
||||
try {
|
||||
await navigator.clipboard.writeText(shareCode);
|
||||
roomCreateMessage = "Share code copied to clipboard!";
|
||||
} catch {
|
||||
roomCreateMessage = "Failed to copy. Please select and copy manually.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,7 +459,100 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Transcription Settings -->
|
||||
<!-- Remote Transcription (moved up for cloud-first UX) -->
|
||||
<section class="settings-section">
|
||||
<h3>Transcription Mode</h3>
|
||||
<div class="radio-group">
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="byok"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
Cloud (Deepgram)
|
||||
</label>
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="managed"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
Managed Service
|
||||
</label>
|
||||
{#if !isCloudOnly}
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="local"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
Local (Whisper)
|
||||
</label>
|
||||
{/if}
|
||||
</div>
|
||||
{#if remoteMode === "byok"}
|
||||
<div class="field">
|
||||
<label for="byok-key">Deepgram API Key</label>
|
||||
<input
|
||||
id="byok-key"
|
||||
type="password"
|
||||
bind:value={byokApiKey}
|
||||
placeholder="Enter your Deepgram API key"
|
||||
/>
|
||||
<p style="font-size: 11px; color: var(--text-muted); margin-top: 4px;">
|
||||
Get a key at <a href="https://console.deepgram.com" target="_blank" rel="noopener" style="color: var(--accent-blue);">console.deepgram.com</a>
|
||||
</p>
|
||||
</div>
|
||||
{/if}
|
||||
{#if remoteMode === "managed"}
|
||||
<div class="managed-auth">
|
||||
{#if managedLoggedIn}
|
||||
<p style="font-size: 13px; margin: 0 0 8px;">
|
||||
<span style="color: var(--accent-green, #4CAF50);">✓ Logged in</span>
|
||||
as <strong>{managedEmail}</strong>
|
||||
</p>
|
||||
<div class="auth-buttons">
|
||||
<button onclick={handleManagedLogout}>Log out</button>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="field">
|
||||
<label for="managed-email">Email</label>
|
||||
<input
|
||||
id="managed-email"
|
||||
type="email"
|
||||
bind:value={managedEmail}
|
||||
placeholder="email@example.com"
|
||||
/>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="managed-password">Password</label>
|
||||
<input
|
||||
id="managed-password"
|
||||
type="password"
|
||||
bind:value={managedPassword}
|
||||
/>
|
||||
</div>
|
||||
<div class="auth-buttons">
|
||||
<button onclick={handleManagedLogin}>Login</button>
|
||||
</div>
|
||||
<p style="font-size: 11px; color: var(--text-muted); margin-top: 8px;">
|
||||
Don't have an account? <a href="https://transcribe.shadowdao.com/register.html" target="_blank" rel="noopener" style="color: var(--accent-blue);">Sign up here</a>
|
||||
</p>
|
||||
{/if}
|
||||
{#if loginMessage}
|
||||
<p style="font-size: 12px; margin-top: 6px; color: {loginMessage.startsWith('Logged') ? 'var(--accent-green, #4CAF50)' : 'var(--accent-red, #f44336)'};">
|
||||
{loginMessage}
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
</section>
|
||||
|
||||
{#if !isCloudMode}
|
||||
<!-- Transcription Settings (local Whisper only) -->
|
||||
<section class="settings-section">
|
||||
<h3>Transcription Settings</h3>
|
||||
<div class="field">
|
||||
@@ -473,6 +698,7 @@
|
||||
/>
|
||||
</div>
|
||||
</section>
|
||||
{/if}
|
||||
|
||||
<!-- Display Settings -->
|
||||
<section class="settings-section">
|
||||
@@ -628,11 +854,11 @@
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Server Sync -->
|
||||
<!-- Server Sync (Shared Captions) -->
|
||||
<section class="settings-section">
|
||||
<h3>Server Sync</h3>
|
||||
<h3>Shared Captions</h3>
|
||||
<div class="field-row">
|
||||
<label for="sync-enabled">Enable Server Sync</label>
|
||||
<label for="sync-enabled">Enable Shared Captions</label>
|
||||
<input
|
||||
id="sync-enabled"
|
||||
type="checkbox"
|
||||
@@ -640,13 +866,57 @@
|
||||
/>
|
||||
</div>
|
||||
{#if syncEnabled}
|
||||
<div class="room-actions">
|
||||
<div class="room-buttons-row">
|
||||
<button
|
||||
onclick={handleCreateRoom}
|
||||
disabled={roomCreating}
|
||||
class="secondary"
|
||||
>
|
||||
{roomCreating ? "Creating..." : "Create Room"}
|
||||
</button>
|
||||
<button
|
||||
onclick={handleShareCurrentRoom}
|
||||
disabled={!syncUrl.trim() || !syncRoom.trim() || !syncPassphrase.trim()}
|
||||
class="secondary"
|
||||
>
|
||||
Share Current Room
|
||||
</button>
|
||||
</div>
|
||||
<div class="join-row">
|
||||
<input
|
||||
type="text"
|
||||
bind:value={joinCode}
|
||||
placeholder="Paste share code to join"
|
||||
class="join-input"
|
||||
/>
|
||||
<button onclick={handleJoinRoom} disabled={!joinCode.trim()} class="secondary">
|
||||
Join
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if roomCreateMessage}
|
||||
<p class="room-message" class:error={roomCreateMessage.startsWith("Error")}>{roomCreateMessage}</p>
|
||||
{/if}
|
||||
|
||||
{#if shareCode}
|
||||
<div class="share-code-box">
|
||||
<label>Share Code</label>
|
||||
<div class="share-code-row">
|
||||
<input type="text" value={shareCode} readonly class="share-code-input" />
|
||||
<button onclick={copyShareCode} class="secondary">Copy</button>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<div class="field">
|
||||
<label for="sync-url">Server URL</label>
|
||||
<input
|
||||
id="sync-url"
|
||||
type="url"
|
||||
bind:value={syncUrl}
|
||||
placeholder="http://localhost:3000/api/send"
|
||||
placeholder="https://caption.shadowdao.com/api/send"
|
||||
/>
|
||||
</div>
|
||||
<div class="field">
|
||||
@@ -664,90 +934,6 @@
|
||||
{/if}
|
||||
</section>
|
||||
|
||||
<!-- Remote Transcription -->
|
||||
<section class="settings-section">
|
||||
<h3>Remote Transcription</h3>
|
||||
<div class="radio-group">
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="local"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
Local
|
||||
</label>
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="managed"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
Managed
|
||||
</label>
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name="remote-mode"
|
||||
value="byok"
|
||||
bind:group={remoteMode}
|
||||
/>
|
||||
BYOK (Bring Your Own Key)
|
||||
</label>
|
||||
</div>
|
||||
{#if remoteMode === "managed"}
|
||||
<div class="field">
|
||||
<label for="remote-url">Server URL</label>
|
||||
<input
|
||||
id="remote-url"
|
||||
type="url"
|
||||
bind:value={remoteServerUrl}
|
||||
placeholder="wss://your-proxy.com"
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
{#if remoteMode === "byok"}
|
||||
<div class="field">
|
||||
<label for="byok-key">Deepgram API Key</label>
|
||||
<input
|
||||
id="byok-key"
|
||||
type="password"
|
||||
bind:value={byokApiKey}
|
||||
placeholder="Enter your Deepgram API key"
|
||||
/>
|
||||
<p style="font-size: 11px; color: var(--text-muted); margin-top: 4px;">
|
||||
Get a key at <a href="https://console.deepgram.com" target="_blank" rel="noopener" style="color: var(--accent-blue);">console.deepgram.com</a>
|
||||
</p>
|
||||
</div>
|
||||
{/if}
|
||||
{#if remoteMode === "managed"}
|
||||
<div class="managed-auth">
|
||||
<div class="field">
|
||||
<label for="managed-email">Email</label>
|
||||
<input
|
||||
id="managed-email"
|
||||
type="email"
|
||||
bind:value={managedEmail}
|
||||
placeholder="email@example.com"
|
||||
/>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="managed-password">Password</label>
|
||||
<input
|
||||
id="managed-password"
|
||||
type="password"
|
||||
bind:value={managedPassword}
|
||||
/>
|
||||
</div>
|
||||
<div class="auth-buttons">
|
||||
<button onclick={handleManagedLogin}>Login</button>
|
||||
<button onclick={handleManagedRegister}>Register</button>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</section>
|
||||
|
||||
<!-- Updates -->
|
||||
<section class="settings-section">
|
||||
<h3>Updates</h3>
|
||||
@@ -943,6 +1129,78 @@
|
||||
color: #f44336;
|
||||
}
|
||||
|
||||
.room-actions {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 8px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.room-buttons-row {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.join-row {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.join-input {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.room-message {
|
||||
font-size: 12px;
|
||||
color: #4CAF50;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.room-message.error {
|
||||
color: #f44336;
|
||||
}
|
||||
|
||||
.share-code-box {
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.share-code-box label {
|
||||
display: block;
|
||||
margin-bottom: 4px;
|
||||
font-size: 12px;
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.share-code-row {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.share-code-input {
|
||||
flex: 1;
|
||||
font-size: 11px;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
.secondary {
|
||||
background: transparent;
|
||||
border: 1px solid var(--border-color);
|
||||
color: var(--text-primary);
|
||||
padding: 6px 12px;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.secondary:hover {
|
||||
background: var(--bg-tertiary);
|
||||
}
|
||||
|
||||
.secondary:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.danger-btn {
|
||||
background: transparent;
|
||||
border: 1px solid var(--accent-red, #f44336);
|
||||
|
||||
@@ -19,6 +19,7 @@ interface BackendState {
|
||||
wsConnection: WebSocket | null;
|
||||
version: string;
|
||||
lastError: string;
|
||||
isCloudOnly: boolean;
|
||||
}
|
||||
|
||||
let state = $state<BackendState>({
|
||||
@@ -30,6 +31,7 @@ let state = $state<BackendState>({
|
||||
wsConnection: null,
|
||||
version: "1.4.0",
|
||||
lastError: "",
|
||||
isCloudOnly: false,
|
||||
});
|
||||
|
||||
let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
@@ -72,6 +74,9 @@ async function pollStatus() {
|
||||
if (data.version) {
|
||||
state.version = data.version;
|
||||
}
|
||||
if (data.is_cloud_only !== undefined) {
|
||||
state.isCloudOnly = data.is_cloud_only;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// API not ready yet, will retry
|
||||
@@ -285,6 +290,9 @@ export const backendStore = {
|
||||
get lastError() {
|
||||
return state.lastError;
|
||||
},
|
||||
get isCloudOnly() {
|
||||
return state.isCloudOnly;
|
||||
},
|
||||
get apiBaseUrl() {
|
||||
return `http://localhost:${state.port}`;
|
||||
},
|
||||
|
||||
@@ -65,6 +65,7 @@ export interface AppConfig {
|
||||
mode: string;
|
||||
server_url: string;
|
||||
auth_token: string;
|
||||
email: string;
|
||||
byok_api_key: string;
|
||||
deepgram_model: string;
|
||||
language: string;
|
||||
@@ -107,7 +108,7 @@ function getDefaultConfig(): AppConfig {
|
||||
},
|
||||
server_sync: {
|
||||
enabled: false,
|
||||
url: "http://localhost:3000/api/send",
|
||||
url: "",
|
||||
room: "default",
|
||||
passphrase: "",
|
||||
},
|
||||
@@ -128,9 +129,10 @@ function getDefaultConfig(): AppConfig {
|
||||
},
|
||||
web_server: { port: 8080, host: "127.0.0.1" },
|
||||
remote: {
|
||||
mode: "local",
|
||||
mode: "byok",
|
||||
server_url: "",
|
||||
auth_token: "",
|
||||
email: "",
|
||||
byok_api_key: "",
|
||||
deepgram_model: "nova-2",
|
||||
language: "en-US",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Version information for Local Transcription."""
|
||||
|
||||
__version__ = "2.0.10"
|
||||
__version_info__ = (2, 0, 10)
|
||||
__version__ = "2.0.20"
|
||||
__version_info__ = (2, 0, 20)
|
||||
|
||||
# Version history:
|
||||
# 1.4.0 - Auto-update feature:
|
||||
|
||||
Reference in New Issue
Block a user