Add Test & Download button for diarization model, clickable links
- Add diarize.download IPC handler that downloads the pyannote model and returns user-friendly error messages (missing license, bad token) - Add download_diarize_model Tauri command - Add "Test & Download Model" button in Speakers settings tab - Update instructions to list both required model licenses (speaker-diarization-3.1 AND segmentation-3.0) - Make all HuggingFace URLs clickable (opens in system browser) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -88,6 +88,57 @@ def make_diarize_handler() -> HandlerFunc:
|
|||||||
return handler
|
return handler
|
||||||
|
|
||||||
|
|
||||||
|
def make_diarize_download_handler() -> HandlerFunc:
|
||||||
|
"""Create a handler that downloads/validates the diarization model."""
|
||||||
|
|
||||||
|
def handler(msg: IPCMessage) -> IPCMessage:
|
||||||
|
payload = msg.payload
|
||||||
|
hf_token = payload.get("hf_token")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pyannote.audio import Pipeline
|
||||||
|
|
||||||
|
print("[sidecar] Downloading diarization model...", file=sys.stderr, flush=True)
|
||||||
|
pipeline = Pipeline.from_pretrained(
|
||||||
|
"pyannote/speaker-diarization-3.1",
|
||||||
|
token=hf_token,
|
||||||
|
)
|
||||||
|
print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
|
||||||
|
return IPCMessage(
|
||||||
|
id=msg.id,
|
||||||
|
type="diarize.download.result",
|
||||||
|
payload={"ok": True},
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
# Make common errors more user-friendly
|
||||||
|
if "403" in error_msg and "gated" in error_msg.lower():
|
||||||
|
# Extract which model needs access
|
||||||
|
if "segmentation" in error_msg:
|
||||||
|
error_msg = (
|
||||||
|
"Access denied for pyannote/segmentation-3.0. "
|
||||||
|
"Please visit huggingface.co/pyannote/segmentation-3.0 "
|
||||||
|
"and accept the license agreement."
|
||||||
|
)
|
||||||
|
elif "speaker-diarization" in error_msg:
|
||||||
|
error_msg = (
|
||||||
|
"Access denied for pyannote/speaker-diarization-3.1. "
|
||||||
|
"Please visit huggingface.co/pyannote/speaker-diarization-3.1 "
|
||||||
|
"and accept the license agreement."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
error_msg = (
|
||||||
|
"Access denied. Please accept the license agreements at: "
|
||||||
|
"huggingface.co/pyannote/speaker-diarization-3.1 and "
|
||||||
|
"huggingface.co/pyannote/segmentation-3.0"
|
||||||
|
)
|
||||||
|
elif "401" in error_msg:
|
||||||
|
error_msg = "Invalid token. Please check your HuggingFace token."
|
||||||
|
return error_message(msg.id, "download_error", error_msg)
|
||||||
|
|
||||||
|
return handler
|
||||||
|
|
||||||
|
|
||||||
def make_pipeline_handler() -> HandlerFunc:
|
def make_pipeline_handler() -> HandlerFunc:
|
||||||
"""Create a full pipeline handler (transcribe + diarize + merge)."""
|
"""Create a full pipeline handler (transcribe + diarize + merge)."""
|
||||||
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
|
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from voice_to_notes.ipc.handlers import ( # noqa: E402
|
|||||||
HandlerRegistry,
|
HandlerRegistry,
|
||||||
hardware_detect_handler,
|
hardware_detect_handler,
|
||||||
make_ai_chat_handler,
|
make_ai_chat_handler,
|
||||||
|
make_diarize_download_handler,
|
||||||
make_diarize_handler,
|
make_diarize_handler,
|
||||||
make_export_handler,
|
make_export_handler,
|
||||||
make_pipeline_handler,
|
make_pipeline_handler,
|
||||||
@@ -32,6 +33,7 @@ def create_registry() -> HandlerRegistry:
|
|||||||
registry.register("transcribe.start", make_transcribe_handler())
|
registry.register("transcribe.start", make_transcribe_handler())
|
||||||
registry.register("hardware.detect", hardware_detect_handler)
|
registry.register("hardware.detect", hardware_detect_handler)
|
||||||
registry.register("diarize.start", make_diarize_handler())
|
registry.register("diarize.start", make_diarize_handler())
|
||||||
|
registry.register("diarize.download", make_diarize_download_handler())
|
||||||
registry.register("pipeline.start", make_pipeline_handler())
|
registry.register("pipeline.start", make_pipeline_handler())
|
||||||
registry.register("export.start", make_export_handler())
|
registry.register("export.start", make_export_handler())
|
||||||
registry.register("ai.chat", make_ai_chat_handler())
|
registry.register("ai.chat", make_ai_chat_handler())
|
||||||
|
|||||||
@@ -40,6 +40,35 @@ pub fn transcribe_file(
|
|||||||
Ok(response.payload)
|
Ok(response.payload)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Download and validate the diarization model via the Python sidecar.
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn download_diarize_model(
|
||||||
|
hf_token: String,
|
||||||
|
) -> Result<Value, String> {
|
||||||
|
let manager = sidecar();
|
||||||
|
manager.ensure_running()?;
|
||||||
|
|
||||||
|
let request_id = uuid::Uuid::new_v4().to_string();
|
||||||
|
let msg = IPCMessage::new(
|
||||||
|
&request_id,
|
||||||
|
"diarize.download",
|
||||||
|
json!({
|
||||||
|
"hf_token": hf_token,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let response = manager.send_and_receive(&msg)?;
|
||||||
|
|
||||||
|
if response.msg_type == "error" {
|
||||||
|
return Ok(json!({
|
||||||
|
"ok": false,
|
||||||
|
"error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(json!({ "ok": true }))
|
||||||
|
}
|
||||||
|
|
||||||
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
/// Run the full transcription + diarization pipeline via the Python sidecar.
|
||||||
#[tauri::command]
|
#[tauri::command]
|
||||||
pub fn run_pipeline(
|
pub fn run_pipeline(
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use commands::export::export_transcript;
|
|||||||
use commands::project::{create_project, get_project, list_projects};
|
use commands::project::{create_project, get_project, list_projects};
|
||||||
use commands::settings::{load_settings, save_settings};
|
use commands::settings::{load_settings, save_settings};
|
||||||
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
|
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
|
||||||
use commands::transcribe::{run_pipeline, transcribe_file};
|
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
|
||||||
use state::AppState;
|
use state::AppState;
|
||||||
|
|
||||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||||
@@ -36,6 +36,7 @@ pub fn run() {
|
|||||||
list_projects,
|
list_projects,
|
||||||
transcribe_file,
|
transcribe_file,
|
||||||
run_pipeline,
|
run_pipeline,
|
||||||
|
download_diarize_model,
|
||||||
export_transcript,
|
export_transcript,
|
||||||
ai_chat,
|
ai_chat,
|
||||||
ai_list_providers,
|
ai_list_providers,
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
|
import { invoke } from '@tauri-apps/api/core';
|
||||||
|
import { openUrl } from '@tauri-apps/plugin-opener';
|
||||||
import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
|
import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
@@ -10,6 +12,32 @@
|
|||||||
|
|
||||||
let localSettings = $state<AppSettings>({ ...$settings });
|
let localSettings = $state<AppSettings>({ ...$settings });
|
||||||
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
||||||
|
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
|
||||||
|
let modelError = $state('');
|
||||||
|
|
||||||
|
async function testAndDownloadModel() {
|
||||||
|
if (!localSettings.hf_token) {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = 'Please enter a HuggingFace token first.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
modelStatus = 'downloading';
|
||||||
|
modelError = '';
|
||||||
|
try {
|
||||||
|
const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
|
||||||
|
hfToken: localSettings.hf_token,
|
||||||
|
});
|
||||||
|
if (result.ok) {
|
||||||
|
modelStatus = 'success';
|
||||||
|
} else {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = result.error || 'Unknown error';
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
modelStatus = 'error';
|
||||||
|
modelError = String(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sync when settings store changes
|
// Sync when settings store changes
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
@@ -86,18 +114,41 @@
|
|||||||
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
|
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
|
||||||
</div>
|
</div>
|
||||||
<div class="info-box">
|
<div class="info-box">
|
||||||
<p class="info-title">Why is this needed?</p>
|
<p class="info-title">Setup (one-time)</p>
|
||||||
<p>Speaker detection uses the <strong>pyannote.audio</strong> model, which is hosted on HuggingFace and requires accepting a license agreement.</p>
|
<p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
|
||||||
<p class="info-title">How to get a token:</p>
|
|
||||||
<ol>
|
<ol>
|
||||||
<li>Create a free account at <strong>huggingface.co</strong></li>
|
<li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
|
||||||
<li>Go to <strong>huggingface.co/pyannote/speaker-diarization-3.1</strong> and accept the license</li>
|
<li>Accept the license on <strong>each</strong> of these pages:
|
||||||
<li>Go to <strong>huggingface.co/settings/tokens</strong> and create a token with <em>read</em> access</li>
|
<ul>
|
||||||
<li>Paste the token above and click Save</li>
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<!-- svelte-ignore a11y_no_static_element_interactions -->
|
||||||
|
<li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
|
||||||
|
<li>Paste the token above and click <strong>Test & Download</strong></li>
|
||||||
</ol>
|
</ol>
|
||||||
<p>The model will be downloaded automatically on first use (~100 MB).</p>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="field checkbox">
|
<button
|
||||||
|
class="btn-download"
|
||||||
|
onclick={testAndDownloadModel}
|
||||||
|
disabled={modelStatus === 'downloading'}
|
||||||
|
>
|
||||||
|
{#if modelStatus === 'downloading'}
|
||||||
|
Downloading model...
|
||||||
|
{:else}
|
||||||
|
Test & Download Model
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
{#if modelStatus === 'success'}
|
||||||
|
<p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
|
||||||
|
{/if}
|
||||||
|
{#if modelStatus === 'error'}
|
||||||
|
<p class="status-error">{modelError}</p>
|
||||||
|
{/if}
|
||||||
|
<div class="field checkbox" style="margin-top: 1rem;">
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
||||||
Skip speaker detection (faster, no speaker labels)
|
Skip speaker detection (faster, no speaker labels)
|
||||||
@@ -303,6 +354,48 @@
|
|||||||
.info-box strong {
|
.info-box strong {
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
}
|
}
|
||||||
|
.ext-link {
|
||||||
|
color: #e94560;
|
||||||
|
cursor: pointer;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
.ext-link:hover {
|
||||||
|
color: #ff6b81;
|
||||||
|
}
|
||||||
|
.info-box ul {
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
padding-left: 1.25rem;
|
||||||
|
}
|
||||||
|
.btn-download {
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
color: #e0e0e0;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
border-radius: 6px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
width: 100%;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
.btn-download:hover:not(:disabled) {
|
||||||
|
background: #1a4a7a;
|
||||||
|
border-color: #e94560;
|
||||||
|
}
|
||||||
|
.btn-download:disabled {
|
||||||
|
opacity: 0.6;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
.status-success {
|
||||||
|
color: #4ecdc4;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
}
|
||||||
|
.status-error {
|
||||||
|
color: #e94560;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
margin: 0.25rem 0;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
.modal-footer {
|
.modal-footer {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: flex-end;
|
justify-content: flex-end;
|
||||||
|
|||||||
Reference in New Issue
Block a user