Add Test & Download button for diarization model, clickable links

- Add diarize.download IPC handler that downloads the pyannote model and returns user-friendly error messages (missing license, bad token) - Add download_diarize_model Tauri command - Add "Test & Download Model" button in Speakers settings tab - Update instructions to list both required model licenses (speaker-diarization-3.1 AND segmentation-3.0) - Make all HuggingFace URLs clickable (opens in system browser) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 18:21:42 -08:00
parent baf820286f
commit a3612c986d
5 changed files with 186 additions and 10 deletions
--- a/python/voice_to_notes/ipc/handlers.py
+++ b/python/voice_to_notes/ipc/handlers.py
@@ -88,6 +88,57 @@ def make_diarize_handler() -> HandlerFunc:
    return handler
 def make_diarize_download_handler() -> HandlerFunc:
    """Create a handler that downloads/validates the diarization model."""
    def handler(msg: IPCMessage) -> IPCMessage:
        payload = msg.payload
        hf_token = payload.get("hf_token")
        try:
            from pyannote.audio import Pipeline
            print("[sidecar] Downloading diarization model...", file=sys.stderr, flush=True)
            pipeline = Pipeline.from_pretrained(
                "pyannote/speaker-diarization-3.1",
                token=hf_token,
            )
            print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
            return IPCMessage(
                id=msg.id,
                type="diarize.download.result",
                payload={"ok": True},
            )
        except Exception as e:
            error_msg = str(e)
            # Make common errors more user-friendly
            if "403" in error_msg and "gated" in error_msg.lower():
                # Extract which model needs access
                if "segmentation" in error_msg:
                    error_msg = (
                        "Access denied for pyannote/segmentation-3.0. "
                        "Please visit huggingface.co/pyannote/segmentation-3.0 "
                        "and accept the license agreement."
                    )
                elif "speaker-diarization" in error_msg:
                    error_msg = (
                        "Access denied for pyannote/speaker-diarization-3.1. "
                        "Please visit huggingface.co/pyannote/speaker-diarization-3.1 "
                        "and accept the license agreement."
                    )
                else:
                    error_msg = (
                        "Access denied. Please accept the license agreements at: "
                        "huggingface.co/pyannote/speaker-diarization-3.1 and "
                        "huggingface.co/pyannote/segmentation-3.0"
                    )
            elif "401" in error_msg:
                error_msg = "Invalid token. Please check your HuggingFace token."
            return error_message(msg.id, "download_error", error_msg)
    return handler
 def make_pipeline_handler() -> HandlerFunc:
    """Create a full pipeline handler (transcribe + diarize + merge)."""
    from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
--- a/python/voice_to_notes/main.py
+++ b/python/voice_to_notes/main.py
@@ -15,6 +15,7 @@ from voice_to_notes.ipc.handlers import (  # noqa: E402
    HandlerRegistry,
    hardware_detect_handler,
    make_ai_chat_handler,
    make_diarize_download_handler,
    make_diarize_handler,
    make_export_handler,
    make_pipeline_handler,
@@ -32,6 +33,7 @@ def create_registry() -> HandlerRegistry:
    registry.register("transcribe.start", make_transcribe_handler())
    registry.register("hardware.detect", hardware_detect_handler)
    registry.register("diarize.start", make_diarize_handler())
    registry.register("diarize.download", make_diarize_download_handler())
    registry.register("pipeline.start", make_pipeline_handler())
    registry.register("export.start", make_export_handler())
    registry.register("ai.chat", make_ai_chat_handler())
--- a/src-tauri/src/commands/transcribe.rs
+++ b/src-tauri/src/commands/transcribe.rs
@@ -40,6 +40,35 @@ pub fn transcribe_file(
    Ok(response.payload)
 }
 /// Download and validate the diarization model via the Python sidecar.
 #[tauri::command]
 pub fn download_diarize_model(
    hf_token: String,
 ) -> Result<Value, String> {
    let manager = sidecar();
    manager.ensure_running()?;
    let request_id = uuid::Uuid::new_v4().to_string();
    let msg = IPCMessage::new(
        &request_id,
        "diarize.download",
        json!({
            "hf_token": hf_token,
        }),
    );
    let response = manager.send_and_receive(&msg)?;
    if response.msg_type == "error" {
        return Ok(json!({
            "ok": false,
            "error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
        }));
    }
    Ok(json!({ "ok": true }))
 }
 /// Run the full transcription + diarization pipeline via the Python sidecar.
 #[tauri::command]
 pub fn run_pipeline(
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -12,7 +12,7 @@ use commands::export::export_transcript;
 use commands::project::{create_project, get_project, list_projects};
 use commands::settings::{load_settings, save_settings};
 use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
-use commands::transcribe::{run_pipeline, transcribe_file};
+use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
 use state::AppState;
 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -36,6 +36,7 @@ pub fn run() {
            list_projects,
            transcribe_file,
            run_pipeline,
            download_diarize_model,
            export_transcript,
            ai_chat,
            ai_list_providers,
--- a/src/lib/components/SettingsModal.svelte
+++ b/src/lib/components/SettingsModal.svelte
@@ -1,4 +1,6 @@
 <script lang="ts">
  import { invoke } from '@tauri-apps/api/core';
  import { openUrl } from '@tauri-apps/plugin-opener';
  import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
  interface Props {
@@ -10,6 +12,32 @@
  let localSettings = $state<AppSettings>({ ...$settings });
  let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
  let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
  let modelError = $state('');
  async function testAndDownloadModel() {
    if (!localSettings.hf_token) {
      modelStatus = 'error';
      modelError = 'Please enter a HuggingFace token first.';
      return;
    }
    modelStatus = 'downloading';
    modelError = '';
    try {
      const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
        hfToken: localSettings.hf_token,
      });
      if (result.ok) {
        modelStatus = 'success';
      } else {
        modelStatus = 'error';
        modelError = result.error || 'Unknown error';
      }
    } catch (err) {
      modelStatus = 'error';
      modelError = String(err);
    }
  }
  // Sync when settings store changes
  $effect(() => {
@@ -86,18 +114,41 @@
            <input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
          </div>
          <div class="info-box">
-            <p class="info-title">Why is this needed?</p>
+            <p class="info-title">Setup (one-time)</p>
-            <p>Speaker detection uses the <strong>pyannote.audio</strong> model, which is hosted on HuggingFace and requires accepting a license agreement.</p>
+            <p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
            <p class="info-title">How to get a token:</p>
            <ol>
-              <li>Create a free account at <strong>huggingface.co</strong></li>
+              <li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
-              <li>Go to <strong>huggingface.co/pyannote/speaker-diarization-3.1</strong> and accept the license</li>
+              <li>Accept the license on <strong>each</strong> of these pages:
-              <li>Go to <strong>huggingface.co/settings/tokens</strong> and create a token with <em>read</em> access</li>
+                <ul>
-              <li>Paste the token above and click Save</li>
+                  <!-- svelte-ignore a11y_no_static_element_interactions -->
                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
                  <!-- svelte-ignore a11y_no_static_element_interactions -->
                  <li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
                </ul>
              </li>
              <!-- svelte-ignore a11y_no_static_element_interactions -->
              <li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
              <li>Paste the token above and click <strong>Test & Download</strong></li>
            </ol>
            <p>The model will be downloaded automatically on first use (~100 MB).</p>
          </div>
-          <div class="field checkbox">
+          <button
            class="btn-download"
            onclick={testAndDownloadModel}
            disabled={modelStatus === 'downloading'}
          >
            {#if modelStatus === 'downloading'}
              Downloading model...
            {:else}
              Test & Download Model
            {/if}
          </button>
          {#if modelStatus === 'success'}
            <p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
          {/if}
          {#if modelStatus === 'error'}
            <p class="status-error">{modelError}</p>
          {/if}
          <div class="field checkbox" style="margin-top: 1rem;">
            <label>
              <input type="checkbox" bind:checked={localSettings.skip_diarization} />
              Skip speaker detection (faster, no speaker labels)
@@ -303,6 +354,48 @@
  .info-box strong {
    color: #e0e0e0;
  }
  .ext-link {
    color: #e94560;
    cursor: pointer;
    text-decoration: underline;
  }
  .ext-link:hover {
    color: #ff6b81;
  }
  .info-box ul {
    margin: 0.25rem 0;
    padding-left: 1.25rem;
  }
  .btn-download {
    background: #0f3460;
    border: 1px solid #4a5568;
    color: #e0e0e0;
    padding: 0.5rem 1rem;
    border-radius: 6px;
    cursor: pointer;
    font-size: 0.85rem;
    width: 100%;
    margin-bottom: 0.5rem;
  }
  .btn-download:hover:not(:disabled) {
    background: #1a4a7a;
    border-color: #e94560;
  }
  .btn-download:disabled {
    opacity: 0.6;
    cursor: not-allowed;
  }
  .status-success {
    color: #4ecdc4;
    font-size: 0.8rem;
    margin: 0.25rem 0;
  }
  .status-error {
    color: #e94560;
    font-size: 0.8rem;
    margin: 0.25rem 0;
    word-break: break-word;
  }
  .modal-footer {
    display: flex;
    justify-content: flex-end;