Cross-platform distribution, UI improvements, and performance optimizations

- PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver
  for self-contained distribution without Python prerequisites
- Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback
- Parallel transcription + diarization pipeline (~30-40% faster)
- GPU auto-detection for diarization (CUDA when available)
- Async run_pipeline command for real-time progress event delivery
- Web Audio API backend for instant playback and seeking
- OpenAI-compatible provider replacing LiteLLM client-side routing
- Cross-platform RAM detection (Linux/macOS/Windows)
- Settings: speaker count hint, token reveal toggles, dark dropdown styling
- Loading splash screen, flexbox layout fix for viewport overflow
- Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM)
- Updated README and CLAUDE.md documentation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude
2026-03-20 21:33:43 -07:00
parent 42ccd3e21d
commit 58faa83cb3
27 changed files with 1301 additions and 283 deletions

View File

@@ -1,6 +1,7 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { segments, speakers } from '$lib/stores/transcript';
import { settings } from '$lib/stores/settings';
interface ChatMessage {
role: 'user' | 'assistant';
@@ -43,9 +44,23 @@
content: m.content,
}));
// Ensure the provider is configured with current credentials before chatting
const s = $settings;
const configMap: Record<string, Record<string, string>> = {
openai: { api_key: s.openai_api_key, model: s.openai_model },
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
};
const config = configMap[s.ai_provider];
if (config) {
await invoke('ai_configure', { provider: s.ai_provider, config });
}
const result = await invoke<{ response: string }>('ai_chat', {
messages: chatMessages,
transcriptContext: getTranscriptContext(),
provider: s.ai_provider,
});
messages = [...messages, { role: 'assistant', content: result.response }];

View File

@@ -8,17 +8,7 @@
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
// Map internal stage names to user-friendly labels
const stageLabels: Record<string, string> = {
'pipeline': 'Pipeline',
'loading_model': 'Loading Model',
'transcribing': 'Transcribing',
'loading_diarization': 'Loading Diarization',
'diarizing': 'Speaker Detection',
'done': 'Complete',
};
// Pipeline steps for the task list
// Pipeline steps in order
const pipelineSteps = [
{ key: 'loading_model', label: 'Load transcription model' },
{ key: 'transcribing', label: 'Transcribe audio' },
@@ -27,17 +17,47 @@
{ key: 'merging', label: 'Merge results' },
];
function getStepStatus(stepKey: string, currentStage: string): 'pending' | 'active' | 'done' {
const stepOrder = pipelineSteps.map(s => s.key);
const currentIdx = stepOrder.indexOf(currentStage);
const stepIdx = stepOrder.indexOf(stepKey);
const stepOrder = pipelineSteps.map(s => s.key);
if (currentStage === 'done') return 'done';
if (stepIdx < currentIdx) return 'done';
if (stepIdx === currentIdx) return 'active';
// Track the highest step index we've reached (never goes backward)
let highestStepIdx = $state(-1);
// Map non-step stages to step indices for progress tracking
function stageToStepIdx(s: string): number {
const direct = stepOrder.indexOf(s);
if (direct >= 0) return direct;
// 'pipeline' stage appears before known steps — don't change highwater mark
return -1;
}
$effect(() => {
if (!visible) {
highestStepIdx = -1;
return;
}
const idx = stageToStepIdx(stage);
if (idx > highestStepIdx) {
highestStepIdx = idx;
}
});
function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
if (stepIdx < highestStepIdx) return 'done';
if (stepIdx === highestStepIdx) return 'active';
return 'pending';
}
// User-friendly display of current stage
const stageLabels: Record<string, string> = {
'pipeline': 'Initializing...',
'loading_model': 'Loading Model',
'transcribing': 'Transcribing',
'loading_diarization': 'Loading Diarization',
'diarizing': 'Speaker Detection',
'merging': 'Merging Results',
'done': 'Complete',
};
let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
</script>
@@ -50,8 +70,8 @@
</div>
<div class="steps">
{#each pipelineSteps as step}
{@const status = getStepStatus(step.key, stage)}
{#each pipelineSteps as step, idx}
{@const status = getStepStatus(idx)}
<div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
<span class="step-icon">
{#if status === 'done'}

View File

@@ -14,6 +14,7 @@
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
let modelError = $state('');
let revealedFields = $state<Set<string>>(new Set());
async function testAndDownloadModel() {
if (!localSettings.hf_token) {
@@ -111,7 +112,10 @@
{:else if activeTab === 'speakers'}
<div class="field">
<label for="hf-token">HuggingFace Token</label>
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
<div class="input-reveal">
<input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
</div>
</div>
<div class="info-box">
<p class="info-title">Setup (one-time)</p>
@@ -150,6 +154,23 @@
{#if modelStatus === 'error'}
<p class="status-error">{modelError}</p>
{/if}
<div class="field" style="margin-top: 1rem;">
<label for="num-speakers">Number of speakers</label>
<select
id="num-speakers"
value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
onchange={(e) => {
const v = parseInt((e.target as HTMLSelectElement).value, 10);
localSettings.num_speakers = v === 0 ? null : v;
}}
>
<option value="0">Auto-detect</option>
{#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
<option value={String(n)}>{n}</option>
{/each}
</select>
<p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
</div>
<div class="field checkbox" style="margin-top: 1rem;">
<label>
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
@@ -163,14 +184,17 @@
<option value="local">Local (llama-server)</option>
<option value="openai">OpenAI</option>
<option value="anthropic">Anthropic</option>
<option value="litellm">LiteLLM</option>
<option value="litellm">OpenAI Compatible</option>
</select>
</div>
{#if localSettings.ai_provider === 'openai'}
<div class="field">
<label for="openai-key">OpenAI API Key</label>
<input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
<div class="input-reveal">
<input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
</div>
</div>
<div class="field">
<label for="openai-model">Model</label>
@@ -179,13 +203,27 @@
{:else if localSettings.ai_provider === 'anthropic'}
<div class="field">
<label for="anthropic-key">Anthropic API Key</label>
<input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
<div class="input-reveal">
<input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
</div>
</div>
<div class="field">
<label for="anthropic-model">Model</label>
<input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
</div>
{:else if localSettings.ai_provider === 'litellm'}
<div class="field">
<label for="litellm-base">API Base URL</label>
<input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
</div>
<div class="field">
<label for="litellm-key">API Key</label>
<div class="input-reveal">
<input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
<button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
</div>
</div>
<div class="field">
<label for="litellm-model">Model</label>
<input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
@@ -293,11 +331,36 @@
color: #aaa;
margin-bottom: 0.3rem;
}
.input-reveal {
display: flex;
gap: 0;
}
.input-reveal input {
flex: 1;
border-top-right-radius: 0;
border-bottom-right-radius: 0;
}
.reveal-btn {
background: #0f3460;
border: 1px solid #4a5568;
border-left: none;
color: #aaa;
padding: 0.5rem 0.6rem;
border-radius: 0 4px 4px 0;
cursor: pointer;
font-size: 0.75rem;
white-space: nowrap;
}
.reveal-btn:hover {
color: #e0e0e0;
background: #1a4a7a;
}
.field input,
.field select {
width: 100%;
background: #1a1a2e;
color: #e0e0e0;
color-scheme: dark;
border: 1px solid #4a5568;
border-radius: 4px;
padding: 0.5rem;

View File

@@ -13,6 +13,7 @@
let container: HTMLDivElement;
let wavesurfer: WaveSurfer | null = $state(null);
let isReady = $state(false);
let isLoading = $state(false);
let currentTime = $state('0:00');
let totalTime = $state('0:00');
@@ -32,6 +33,7 @@
barWidth: 2,
barGap: 1,
barRadius: 2,
backend: 'WebAudio',
});
wavesurfer.on('timeupdate', (time: number) => {
@@ -41,6 +43,7 @@
wavesurfer.on('ready', () => {
isReady = true;
isLoading = false;
const dur = wavesurfer!.getDuration();
durationMs.set(Math.round(dur * 1000));
totalTime = formatTime(dur);
@@ -55,7 +58,7 @@
});
if (audioUrl) {
wavesurfer.load(audioUrl);
loadAudio(audioUrl);
}
});
@@ -89,16 +92,13 @@
console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
return;
}
const timeSec = timeMs / 1000;
wavesurfer.setTime(timeSec);
if (!wavesurfer.isPlaying()) {
wavesurfer.play();
}
wavesurfer.setTime(timeMs / 1000);
}
/** Load a new audio file. */
export function loadAudio(url: string) {
isReady = false;
isLoading = true;
wavesurfer?.load(url);
}
</script>

View File

@@ -8,6 +8,8 @@ export interface AppSettings {
openai_model: string;
anthropic_model: string;
litellm_model: string;
litellm_api_key: string;
litellm_api_base: string;
local_model_path: string;
local_binary_path: string;
transcription_model: string;
@@ -15,6 +17,7 @@ export interface AppSettings {
transcription_language: string;
skip_diarization: boolean;
hf_token: string;
num_speakers: number | null;
}
const defaults: AppSettings = {
@@ -24,6 +27,8 @@ const defaults: AppSettings = {
openai_model: 'gpt-4o-mini',
anthropic_model: 'claude-sonnet-4-6',
litellm_model: 'gpt-4o-mini',
litellm_api_key: '',
litellm_api_base: '',
local_model_path: '',
local_binary_path: 'llama-server',
transcription_model: 'base',
@@ -31,6 +36,7 @@ const defaults: AppSettings = {
transcription_language: '',
skip_diarization: false,
hf_token: '',
num_speakers: null,
};
export const settings = writable<AppSettings>({ ...defaults });
@@ -47,4 +53,20 @@ export async function loadSettings(): Promise<void> {
export async function saveSettings(s: AppSettings): Promise<void> {
settings.set(s);
await invoke('save_settings', { settings: s });
// Configure the AI provider in the Python sidecar
const configMap: Record<string, Record<string, string>> = {
openai: { api_key: s.openai_api_key, model: s.openai_model },
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
};
const config = configMap[s.ai_provider];
if (config) {
try {
await invoke('ai_configure', { provider: s.ai_provider, config });
} catch {
// Sidecar may not be running yet — provider will be configured on first use
}
}
}