perf/pipeline-improvements #1

Merged
jknapp merged 18 commits from perf/pipeline-improvements into main 2026-03-21 04:53:45 +00:00
5 changed files with 186 additions and 10 deletions
Showing only changes of commit a3612c986d - Show all commits

View File

@@ -88,6 +88,57 @@ def make_diarize_handler() -> HandlerFunc:
return handler
def make_diarize_download_handler() -> HandlerFunc:
"""Create a handler that downloads/validates the diarization model."""
def handler(msg: IPCMessage) -> IPCMessage:
payload = msg.payload
hf_token = payload.get("hf_token")
try:
from pyannote.audio import Pipeline
print("[sidecar] Downloading diarization model...", file=sys.stderr, flush=True)
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
token=hf_token,
)
print("[sidecar] Diarization model downloaded successfully", file=sys.stderr, flush=True)
return IPCMessage(
id=msg.id,
type="diarize.download.result",
payload={"ok": True},
)
except Exception as e:
error_msg = str(e)
# Make common errors more user-friendly
if "403" in error_msg and "gated" in error_msg.lower():
# Extract which model needs access
if "segmentation" in error_msg:
error_msg = (
"Access denied for pyannote/segmentation-3.0. "
"Please visit huggingface.co/pyannote/segmentation-3.0 "
"and accept the license agreement."
)
elif "speaker-diarization" in error_msg:
error_msg = (
"Access denied for pyannote/speaker-diarization-3.1. "
"Please visit huggingface.co/pyannote/speaker-diarization-3.1 "
"and accept the license agreement."
)
else:
error_msg = (
"Access denied. Please accept the license agreements at: "
"huggingface.co/pyannote/speaker-diarization-3.1 and "
"huggingface.co/pyannote/segmentation-3.0"
)
elif "401" in error_msg:
error_msg = "Invalid token. Please check your HuggingFace token."
return error_message(msg.id, "download_error", error_msg)
return handler
def make_pipeline_handler() -> HandlerFunc:
"""Create a full pipeline handler (transcribe + diarize + merge)."""
from voice_to_notes.services.pipeline import PipelineService, pipeline_result_to_payload

View File

@@ -15,6 +15,7 @@ from voice_to_notes.ipc.handlers import ( # noqa: E402
HandlerRegistry,
hardware_detect_handler,
make_ai_chat_handler,
make_diarize_download_handler,
make_diarize_handler,
make_export_handler,
make_pipeline_handler,
@@ -32,6 +33,7 @@ def create_registry() -> HandlerRegistry:
registry.register("transcribe.start", make_transcribe_handler())
registry.register("hardware.detect", hardware_detect_handler)
registry.register("diarize.start", make_diarize_handler())
registry.register("diarize.download", make_diarize_download_handler())
registry.register("pipeline.start", make_pipeline_handler())
registry.register("export.start", make_export_handler())
registry.register("ai.chat", make_ai_chat_handler())

View File

@@ -40,6 +40,35 @@ pub fn transcribe_file(
Ok(response.payload)
}
/// Download and validate the diarization model via the Python sidecar.
#[tauri::command]
pub fn download_diarize_model(
hf_token: String,
) -> Result<Value, String> {
let manager = sidecar();
manager.ensure_running()?;
let request_id = uuid::Uuid::new_v4().to_string();
let msg = IPCMessage::new(
&request_id,
"diarize.download",
json!({
"hf_token": hf_token,
}),
);
let response = manager.send_and_receive(&msg)?;
if response.msg_type == "error" {
return Ok(json!({
"ok": false,
"error": response.payload.get("message").and_then(|v| v.as_str()).unwrap_or("unknown"),
}));
}
Ok(json!({ "ok": true }))
}
/// Run the full transcription + diarization pipeline via the Python sidecar.
#[tauri::command]
pub fn run_pipeline(

View File

@@ -12,7 +12,7 @@ use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects};
use commands::settings::{load_settings, save_settings};
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::transcribe::{run_pipeline, transcribe_file};
use commands::transcribe::{download_diarize_model, run_pipeline, transcribe_file};
use state::AppState;
#[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -36,6 +36,7 @@ pub fn run() {
list_projects,
transcribe_file,
run_pipeline,
download_diarize_model,
export_transcript,
ai_chat,
ai_list_providers,

View File

@@ -1,4 +1,6 @@
<script lang="ts">
import { invoke } from '@tauri-apps/api/core';
import { openUrl } from '@tauri-apps/plugin-opener';
import { settings, saveSettings, type AppSettings } from '$lib/stores/settings';
interface Props {
@@ -10,6 +12,32 @@
let localSettings = $state<AppSettings>({ ...$settings });
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
let modelStatus = $state<'idle' | 'downloading' | 'success' | 'error'>('idle');
let modelError = $state('');
async function testAndDownloadModel() {
if (!localSettings.hf_token) {
modelStatus = 'error';
modelError = 'Please enter a HuggingFace token first.';
return;
}
modelStatus = 'downloading';
modelError = '';
try {
const result = await invoke<{ ok: boolean; error?: string }>('download_diarize_model', {
hfToken: localSettings.hf_token,
});
if (result.ok) {
modelStatus = 'success';
} else {
modelStatus = 'error';
modelError = result.error || 'Unknown error';
}
} catch (err) {
modelStatus = 'error';
modelError = String(err);
}
}
// Sync when settings store changes
$effect(() => {
@@ -86,18 +114,41 @@
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
</div>
<div class="info-box">
<p class="info-title">Why is this needed?</p>
<p>Speaker detection uses the <strong>pyannote.audio</strong> model, which is hosted on HuggingFace and requires accepting a license agreement.</p>
<p class="info-title">How to get a token:</p>
<p class="info-title">Setup (one-time)</p>
<p>Speaker detection uses <strong>pyannote.audio</strong> models hosted on HuggingFace. You must accept the license for each model:</p>
<ol>
<li>Create a free account at <strong>huggingface.co</strong></li>
<li>Go to <strong>huggingface.co/pyannote/speaker-diarization-3.1</strong> and accept the license</li>
<li>Go to <strong>huggingface.co/settings/tokens</strong> and create a token with <em>read</em> access</li>
<li>Paste the token above and click Save</li>
<li>Create a free account at <!-- svelte-ignore a11y_no_static_element_interactions --><a class="ext-link" onclick={() => openUrl('https://huggingface.co/join')}>huggingface.co</a></li>
<li>Accept the license on <strong>each</strong> of these pages:
<ul>
<!-- svelte-ignore a11y_no_static_element_interactions -->
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/speaker-diarization-3.1')}>pyannote/speaker-diarization-3.1</a></li>
<!-- svelte-ignore a11y_no_static_element_interactions -->
<li><a class="ext-link" onclick={() => openUrl('https://huggingface.co/pyannote/segmentation-3.0')}>pyannote/segmentation-3.0</a></li>
</ul>
</li>
<!-- svelte-ignore a11y_no_static_element_interactions -->
<li>Create a token at <a class="ext-link" onclick={() => openUrl('https://huggingface.co/settings/tokens')}>huggingface.co/settings/tokens</a> (read access)</li>
<li>Paste the token above and click <strong>Test & Download</strong></li>
</ol>
<p>The model will be downloaded automatically on first use (~100 MB).</p>
</div>
<div class="field checkbox">
<button
class="btn-download"
onclick={testAndDownloadModel}
disabled={modelStatus === 'downloading'}
>
{#if modelStatus === 'downloading'}
Downloading model...
{:else}
Test & Download Model
{/if}
</button>
{#if modelStatus === 'success'}
<p class="status-success">Model downloaded successfully. Speaker detection is ready.</p>
{/if}
{#if modelStatus === 'error'}
<p class="status-error">{modelError}</p>
{/if}
<div class="field checkbox" style="margin-top: 1rem;">
<label>
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
Skip speaker detection (faster, no speaker labels)
@@ -303,6 +354,48 @@
.info-box strong {
color: #e0e0e0;
}
.ext-link {
color: #e94560;
cursor: pointer;
text-decoration: underline;
}
.ext-link:hover {
color: #ff6b81;
}
.info-box ul {
margin: 0.25rem 0;
padding-left: 1.25rem;
}
.btn-download {
background: #0f3460;
border: 1px solid #4a5568;
color: #e0e0e0;
padding: 0.5rem 1rem;
border-radius: 6px;
cursor: pointer;
font-size: 0.85rem;
width: 100%;
margin-bottom: 0.5rem;
}
.btn-download:hover:not(:disabled) {
background: #1a4a7a;
border-color: #e94560;
}
.btn-download:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status-success {
color: #4ecdc4;
font-size: 0.8rem;
margin: 0.25rem 0;
}
.status-error {
color: #e94560;
font-size: 0.8rem;
margin: 0.25rem 0;
word-break: break-word;
}
.modal-footer {
display: flex;
justify-content: flex-end;