Cross-platform distribution, UI improvements, and performance optimizations

- PyInstaller frozen sidecar: spec file, build script, and ffmpeg path resolver for self-contained distribution without Python prerequisites - Dual-mode sidecar launcher: frozen binary (production) with dev mode fallback - Parallel transcription + diarization pipeline (~30-40% faster) - GPU auto-detection for diarization (CUDA when available) - Async run_pipeline command for real-time progress event delivery - Web Audio API backend for instant playback and seeking - OpenAI-compatible provider replacing LiteLLM client-side routing - Cross-platform RAM detection (Linux/macOS/Windows) - Settings: speaker count hint, token reveal toggles, dark dropdown styling - Loading splash screen, flexbox layout fix for viewport overflow - Gitea Actions CI/CD pipeline (Linux, Windows, macOS ARM) - Updated README and CLAUDE.md documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 21:33:43 -07:00
parent 42ccd3e21d
commit 58faa83cb3
27 changed files with 1301 additions and 283 deletions
--- a/src/lib/components/AIChatPanel.svelte
+++ b/src/lib/components/AIChatPanel.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { segments, speakers } from '$lib/stores/transcript';
+  import { settings } from '$lib/stores/settings';

  interface ChatMessage {
    role: 'user' | 'assistant';
@@ -43,9 +44,23 @@
        content: m.content,
      }));

+      // Ensure the provider is configured with current credentials before chatting
+      const s = $settings;
+      const configMap: Record<string, Record<string, string>> = {
+        openai: { api_key: s.openai_api_key, model: s.openai_model },
+        anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
+        litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
+        local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
+      };
+      const config = configMap[s.ai_provider];
+      if (config) {
+        await invoke('ai_configure', { provider: s.ai_provider, config });
+      }
+
      const result = await invoke<{ response: string }>('ai_chat', {
        messages: chatMessages,
        transcriptContext: getTranscriptContext(),
+        provider: s.ai_provider,
      });

      messages = [...messages, { role: 'assistant', content: result.response }];
--- a/src/lib/components/ProgressOverlay.svelte
+++ b/src/lib/components/ProgressOverlay.svelte
@@ -8,17 +8,7 @@

  let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();

-  // Map internal stage names to user-friendly labels
-  const stageLabels: Record<string, string> = {
-    'pipeline': 'Pipeline',
-    'loading_model': 'Loading Model',
-    'transcribing': 'Transcribing',
-    'loading_diarization': 'Loading Diarization',
-    'diarizing': 'Speaker Detection',
-    'done': 'Complete',
-  };
-
-  // Pipeline steps for the task list
+  // Pipeline steps in order
  const pipelineSteps = [
    { key: 'loading_model', label: 'Load transcription model' },
    { key: 'transcribing', label: 'Transcribe audio' },
@@ -27,17 +17,47 @@
    { key: 'merging', label: 'Merge results' },
  ];

-  function getStepStatus(stepKey: string, currentStage: string): 'pending' | 'active' | 'done' {
-    const stepOrder = pipelineSteps.map(s => s.key);
-    const currentIdx = stepOrder.indexOf(currentStage);
-    const stepIdx = stepOrder.indexOf(stepKey);
+  const stepOrder = pipelineSteps.map(s => s.key);

-    if (currentStage === 'done') return 'done';
-    if (stepIdx < currentIdx) return 'done';
-    if (stepIdx === currentIdx) return 'active';
+  // Track the highest step index we've reached (never goes backward)
+  let highestStepIdx = $state(-1);
+
+  // Map non-step stages to step indices for progress tracking
+  function stageToStepIdx(s: string): number {
+    const direct = stepOrder.indexOf(s);
+    if (direct >= 0) return direct;
+    // 'pipeline' stage appears before known steps — don't change highwater mark
+    return -1;
+  }
+
+  $effect(() => {
+    if (!visible) {
+      highestStepIdx = -1;
+      return;
+    }
+    const idx = stageToStepIdx(stage);
+    if (idx > highestStepIdx) {
+      highestStepIdx = idx;
+    }
+  });
+
+  function getStepStatus(stepIdx: number): 'pending' | 'active' | 'done' {
+    if (stepIdx < highestStepIdx) return 'done';
+    if (stepIdx === highestStepIdx) return 'active';
    return 'pending';
  }

+  // User-friendly display of current stage
+  const stageLabels: Record<string, string> = {
+    'pipeline': 'Initializing...',
+    'loading_model': 'Loading Model',
+    'transcribing': 'Transcribing',
+    'loading_diarization': 'Loading Diarization',
+    'diarizing': 'Speaker Detection',
+    'merging': 'Merging Results',
+    'done': 'Complete',
+  };
+
  let displayStage = $derived(stageLabels[stage] || stage || 'Processing...');
 </script>

@@ -50,8 +70,8 @@
      </div>

      <div class="steps">
-        {#each pipelineSteps as step}
-          {@const status = getStepStatus(step.key, stage)}
+        {#each pipelineSteps as step, idx}
+          {@const status = getStepStatus(idx)}
          <div class="step" class:step-done={status === 'done'} class:step-active={status === 'active'}>
            <span class="step-icon">
              {#if status === 'done'}
--- a/src/lib/components/SettingsModal.svelte
+++ b/src/lib/components/SettingsModal.svelte
@@ -14,6 +14,7 @@
  let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
  let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
  let modelError = $state('');
+  let revealedFields = $state<Set<string>>(new Set());

  async function testAndDownloadModel() {
    if (!localSettings.hf_token) {
@@ -111,7 +112,10 @@
        {:else if activeTab === 'speakers'}
          <div class="field">
            <label for="hf-token">HuggingFace Token</label>
-            <input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
+            <div class="input-reveal">
+              <input id="hf-token" type={revealedFields.has('hf-token') ? 'text' : 'password'} bind:value={localSettings.hf_token} placeholder="hf_..." />
+              <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('hf-token') ? s.delete('hf-token') : s.add('hf-token'); revealedFields = s; }}>{revealedFields.has('hf-token') ? 'Hide' : 'Show'}</button>
+            </div>
          </div>
          <div class="info-box">
            <p class="info-title">Setup (one-time)</p>
@@ -150,6 +154,23 @@
          {#if modelStatus === 'error'}
            <p class="status-error">{modelError}</p>
          {/if}
+          <div class="field" style="margin-top: 1rem;">
+            <label for="num-speakers">Number of speakers</label>
+            <select
+              id="num-speakers"
+              value={localSettings.num_speakers === null || localSettings.num_speakers === 0 ? '0' : String(localSettings.num_speakers)}
+              onchange={(e) => {
+                const v = parseInt((e.target as HTMLSelectElement).value, 10);
+                localSettings.num_speakers = v === 0 ? null : v;
+              }}
+            >
+              <option value="0">Auto-detect</option>
+              {#each Array.from({ length: 20 }, (_, i) => i + 1) as n}
+                <option value={String(n)}>{n}</option>
+              {/each}
+            </select>
+            <p class="hint">Hint the expected number of speakers to speed up diarization clustering.</p>
+          </div>
          <div class="field checkbox" style="margin-top: 1rem;">
            <label>
              <input type="checkbox" bind:checked={localSettings.skip_diarization} />
@@ -163,14 +184,17 @@
              <option value="local">Local (llama-server)</option>
              <option value="openai">OpenAI</option>
              <option value="anthropic">Anthropic</option>
-              <option value="litellm">LiteLLM</option>
+              <option value="litellm">OpenAI Compatible</option>
            </select>
          </div>

          {#if localSettings.ai_provider === 'openai'}
            <div class="field">
              <label for="openai-key">OpenAI API Key</label>
-              <input id="openai-key" type="password" bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+              <div class="input-reveal">
+                <input id="openai-key" type={revealedFields.has('openai-key') ? 'text' : 'password'} bind:value={localSettings.openai_api_key} placeholder="sk-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('openai-key') ? s.delete('openai-key') : s.add('openai-key'); revealedFields = s; }}>{revealedFields.has('openai-key') ? 'Hide' : 'Show'}</button>
+              </div>
            </div>
            <div class="field">
              <label for="openai-model">Model</label>
@@ -179,13 +203,27 @@
          {:else if localSettings.ai_provider === 'anthropic'}
            <div class="field">
              <label for="anthropic-key">Anthropic API Key</label>
-              <input id="anthropic-key" type="password" bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+              <div class="input-reveal">
+                <input id="anthropic-key" type={revealedFields.has('anthropic-key') ? 'text' : 'password'} bind:value={localSettings.anthropic_api_key} placeholder="sk-ant-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('anthropic-key') ? s.delete('anthropic-key') : s.add('anthropic-key'); revealedFields = s; }}>{revealedFields.has('anthropic-key') ? 'Hide' : 'Show'}</button>
+              </div>
            </div>
            <div class="field">
              <label for="anthropic-model">Model</label>
              <input id="anthropic-model" type="text" bind:value={localSettings.anthropic_model} />
            </div>
          {:else if localSettings.ai_provider === 'litellm'}
+            <div class="field">
+              <label for="litellm-base">API Base URL</label>
+              <input id="litellm-base" type="text" bind:value={localSettings.litellm_api_base} placeholder="https://your-litellm-proxy.example.com" />
+            </div>
+            <div class="field">
+              <label for="litellm-key">API Key</label>
+              <div class="input-reveal">
+                <input id="litellm-key" type={revealedFields.has('litellm-key') ? 'text' : 'password'} bind:value={localSettings.litellm_api_key} placeholder="sk-..." />
+                <button type="button" class="reveal-btn" onclick={() => { const s = new Set(revealedFields); s.has('litellm-key') ? s.delete('litellm-key') : s.add('litellm-key'); revealedFields = s; }}>{revealedFields.has('litellm-key') ? 'Hide' : 'Show'}</button>
+              </div>
+            </div>
            <div class="field">
              <label for="litellm-model">Model</label>
              <input id="litellm-model" type="text" bind:value={localSettings.litellm_model} placeholder="provider/model-name" />
@@ -293,11 +331,36 @@
    color: #aaa;
    margin-bottom: 0.3rem;
  }
+  .input-reveal {
+    display: flex;
+    gap: 0;
+  }
+  .input-reveal input {
+    flex: 1;
+    border-top-right-radius: 0;
+    border-bottom-right-radius: 0;
+  }
+  .reveal-btn {
+    background: #0f3460;
+    border: 1px solid #4a5568;
+    border-left: none;
+    color: #aaa;
+    padding: 0.5rem 0.6rem;
+    border-radius: 0 4px 4px 0;
+    cursor: pointer;
+    font-size: 0.75rem;
+    white-space: nowrap;
+  }
+  .reveal-btn:hover {
+    color: #e0e0e0;
+    background: #1a4a7a;
+  }
  .field input,
  .field select {
    width: 100%;
    background: #1a1a2e;
    color: #e0e0e0;
+    color-scheme: dark;
    border: 1px solid #4a5568;
    border-radius: 4px;
    padding: 0.5rem;
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -13,6 +13,7 @@
  let container: HTMLDivElement;
  let wavesurfer: WaveSurfer | null = $state(null);
  let isReady = $state(false);
+  let isLoading = $state(false);
  let currentTime = $state('0:00');
  let totalTime = $state('0:00');

@@ -32,6 +33,7 @@
      barWidth: 2,
      barGap: 1,
      barRadius: 2,
+      backend: 'WebAudio',
    });

    wavesurfer.on('timeupdate', (time: number) => {
@@ -41,6 +43,7 @@

    wavesurfer.on('ready', () => {
      isReady = true;
+      isLoading = false;
      const dur = wavesurfer!.getDuration();
      durationMs.set(Math.round(dur * 1000));
      totalTime = formatTime(dur);
@@ -55,7 +58,7 @@
    });

    if (audioUrl) {
-      wavesurfer.load(audioUrl);
+      loadAudio(audioUrl);
    }
  });

@@ -89,16 +92,13 @@
      console.warn('[voice-to-notes] seekTo ignored — audio not ready yet');
      return;
    }
-    const timeSec = timeMs / 1000;
-    wavesurfer.setTime(timeSec);
-    if (!wavesurfer.isPlaying()) {
-      wavesurfer.play();
-    }
+    wavesurfer.setTime(timeMs / 1000);
  }

  /** Load a new audio file. */
  export function loadAudio(url: string) {
    isReady = false;
+    isLoading = true;
    wavesurfer?.load(url);
  }
 </script>
--- a/src/lib/stores/settings.ts
+++ b/src/lib/stores/settings.ts
@@ -8,6 +8,8 @@ export interface AppSettings {
  openai_model: string;
  anthropic_model: string;
  litellm_model: string;
+  litellm_api_key: string;
+  litellm_api_base: string;
  local_model_path: string;
  local_binary_path: string;
  transcription_model: string;
@@ -15,6 +17,7 @@ export interface AppSettings {
  transcription_language: string;
  skip_diarization: boolean;
  hf_token: string;
+  num_speakers: number | null;
 }

 const defaults: AppSettings = {
@@ -24,6 +27,8 @@ const defaults: AppSettings = {
  openai_model: 'gpt-4o-mini',
  anthropic_model: 'claude-sonnet-4-6',
  litellm_model: 'gpt-4o-mini',
+  litellm_api_key: '',
+  litellm_api_base: '',
  local_model_path: '',
  local_binary_path: 'llama-server',
  transcription_model: 'base',
@@ -31,6 +36,7 @@ const defaults: AppSettings = {
  transcription_language: '',
  skip_diarization: false,
  hf_token: '',
+  num_speakers: null,
 };

 export const settings = writable<AppSettings>({ ...defaults });
@@ -47,4 +53,20 @@ export async function loadSettings(): Promise<void> {
 export async function saveSettings(s: AppSettings): Promise<void> {
  settings.set(s);
  await invoke('save_settings', { settings: s });
+
+  // Configure the AI provider in the Python sidecar
+  const configMap: Record<string, Record<string, string>> = {
+    openai: { api_key: s.openai_api_key, model: s.openai_model },
+    anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
+    litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
+    local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
+  };
+  const config = configMap[s.ai_provider];
+  if (config) {
+    try {
+      await invoke('ai_configure', { provider: s.ai_provider, config });
+    } catch {
+      // Sidecar may not be running yet — provider will be configured on first use
+    }
+  }
 }