perf/pipeline-improvements #1
@@ -107,6 +107,7 @@ def make_pipeline_handler() -> HandlerFunc:
|
|||||||
min_speakers=payload.get("min_speakers"),
|
min_speakers=payload.get("min_speakers"),
|
||||||
max_speakers=payload.get("max_speakers"),
|
max_speakers=payload.get("max_speakers"),
|
||||||
skip_diarization=payload.get("skip_diarization", False),
|
skip_diarization=payload.get("skip_diarization", False),
|
||||||
|
hf_token=payload.get("hf_token"),
|
||||||
)
|
)
|
||||||
return IPCMessage(
|
return IPCMessage(
|
||||||
id=msg.id,
|
id=msg.id,
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class DiarizeService:
|
|||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self._pipeline: Any = None
|
self._pipeline: Any = None
|
||||||
|
|
||||||
def _ensure_pipeline(self) -> Any:
|
def _ensure_pipeline(self, hf_token: str | None = None) -> Any:
|
||||||
"""Load the pyannote diarization pipeline (lazy)."""
|
"""Load the pyannote diarization pipeline (lazy)."""
|
||||||
if self._pipeline is not None:
|
if self._pipeline is not None:
|
||||||
return self._pipeline
|
return self._pipeline
|
||||||
@@ -44,6 +44,8 @@ class DiarizeService:
|
|||||||
|
|
||||||
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
|
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
|
||||||
|
|
||||||
|
# Use token from argument, fall back to environment variable
|
||||||
|
if not hf_token:
|
||||||
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
|
hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
|
||||||
|
|
||||||
models = [
|
models = [
|
||||||
@@ -81,6 +83,7 @@ class DiarizeService:
|
|||||||
num_speakers: int | None = None,
|
num_speakers: int | None = None,
|
||||||
min_speakers: int | None = None,
|
min_speakers: int | None = None,
|
||||||
max_speakers: int | None = None,
|
max_speakers: int | None = None,
|
||||||
|
hf_token: str | None = None,
|
||||||
) -> DiarizationResult:
|
) -> DiarizationResult:
|
||||||
"""Run speaker diarization on an audio file.
|
"""Run speaker diarization on an audio file.
|
||||||
|
|
||||||
@@ -98,7 +101,7 @@ class DiarizeService:
|
|||||||
progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
|
progress_message(request_id, 0, "loading_diarization", "Loading diarization model...")
|
||||||
)
|
)
|
||||||
|
|
||||||
pipeline = self._ensure_pipeline()
|
pipeline = self._ensure_pipeline(hf_token=hf_token)
|
||||||
|
|
||||||
write_message(
|
write_message(
|
||||||
progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
|
progress_message(request_id, 20, "diarizing", "Running speaker diarization...")
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ class PipelineService:
|
|||||||
min_speakers: int | None = None,
|
min_speakers: int | None = None,
|
||||||
max_speakers: int | None = None,
|
max_speakers: int | None = None,
|
||||||
skip_diarization: bool = False,
|
skip_diarization: bool = False,
|
||||||
|
hf_token: str | None = None,
|
||||||
) -> PipelineResult:
|
) -> PipelineResult:
|
||||||
"""Run the full transcription + diarization pipeline.
|
"""Run the full transcription + diarization pipeline.
|
||||||
|
|
||||||
@@ -123,6 +124,7 @@ class PipelineService:
|
|||||||
num_speakers=num_speakers,
|
num_speakers=num_speakers,
|
||||||
min_speakers=min_speakers,
|
min_speakers=min_speakers,
|
||||||
max_speakers=max_speakers,
|
max_speakers=max_speakers,
|
||||||
|
hf_token=hf_token,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(
|
print(
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ pub fn run_pipeline(
|
|||||||
min_speakers: Option<u32>,
|
min_speakers: Option<u32>,
|
||||||
max_speakers: Option<u32>,
|
max_speakers: Option<u32>,
|
||||||
skip_diarization: Option<bool>,
|
skip_diarization: Option<bool>,
|
||||||
|
hf_token: Option<String>,
|
||||||
) -> Result<Value, String> {
|
) -> Result<Value, String> {
|
||||||
let manager = sidecar();
|
let manager = sidecar();
|
||||||
manager.ensure_running()?;
|
manager.ensure_running()?;
|
||||||
@@ -70,6 +71,7 @@ pub fn run_pipeline(
|
|||||||
"min_speakers": min_speakers,
|
"min_speakers": min_speakers,
|
||||||
"max_speakers": max_speakers,
|
"max_speakers": max_speakers,
|
||||||
"skip_diarization": skip_diarization.unwrap_or(false),
|
"skip_diarization": skip_diarization.unwrap_or(false),
|
||||||
|
"hf_token": hf_token,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
let { visible, onClose }: Props = $props();
|
let { visible, onClose }: Props = $props();
|
||||||
|
|
||||||
let localSettings = $state<AppSettings>({ ...$settings });
|
let localSettings = $state<AppSettings>({ ...$settings });
|
||||||
let activeTab = $state<'transcription' | 'ai' | 'local'>('transcription');
|
let activeTab = $state<'transcription' | 'speakers' | 'ai' | 'local'>('transcription');
|
||||||
|
|
||||||
// Sync when settings store changes
|
// Sync when settings store changes
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
@@ -46,6 +46,9 @@
|
|||||||
<button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
|
<button class="tab" class:active={activeTab === 'transcription'} onclick={() => activeTab = 'transcription'}>
|
||||||
Transcription
|
Transcription
|
||||||
</button>
|
</button>
|
||||||
|
<button class="tab" class:active={activeTab === 'speakers'} onclick={() => activeTab = 'speakers'}>
|
||||||
|
Speakers
|
||||||
|
</button>
|
||||||
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
|
<button class="tab" class:active={activeTab === 'ai'} onclick={() => activeTab = 'ai'}>
|
||||||
AI Provider
|
AI Provider
|
||||||
</button>
|
</button>
|
||||||
@@ -77,10 +80,27 @@
|
|||||||
<label for="stt-lang">Language (blank = auto-detect)</label>
|
<label for="stt-lang">Language (blank = auto-detect)</label>
|
||||||
<input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
|
<input id="stt-lang" type="text" bind:value={localSettings.transcription_language} placeholder="e.g., en, es, fr" />
|
||||||
</div>
|
</div>
|
||||||
|
{:else if activeTab === 'speakers'}
|
||||||
|
<div class="field">
|
||||||
|
<label for="hf-token">HuggingFace Token</label>
|
||||||
|
<input id="hf-token" type="password" bind:value={localSettings.hf_token} placeholder="hf_..." />
|
||||||
|
</div>
|
||||||
|
<div class="info-box">
|
||||||
|
<p class="info-title">Why is this needed?</p>
|
||||||
|
<p>Speaker detection uses the <strong>pyannote.audio</strong> model, which is hosted on HuggingFace and requires accepting a license agreement.</p>
|
||||||
|
<p class="info-title">How to get a token:</p>
|
||||||
|
<ol>
|
||||||
|
<li>Create a free account at <strong>huggingface.co</strong></li>
|
||||||
|
<li>Go to <strong>huggingface.co/pyannote/speaker-diarization-3.1</strong> and accept the license</li>
|
||||||
|
<li>Go to <strong>huggingface.co/settings/tokens</strong> and create a token with <em>read</em> access</li>
|
||||||
|
<li>Paste the token above and click Save</li>
|
||||||
|
</ol>
|
||||||
|
<p>The model will be downloaded automatically on first use (~100 MB).</p>
|
||||||
|
</div>
|
||||||
<div class="field checkbox">
|
<div class="field checkbox">
|
||||||
<label>
|
<label>
|
||||||
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
<input type="checkbox" bind:checked={localSettings.skip_diarization} />
|
||||||
Skip speaker diarization (faster, no speaker labels)
|
Skip speaker detection (faster, no speaker labels)
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
{:else if activeTab === 'ai'}
|
{:else if activeTab === 'ai'}
|
||||||
@@ -252,6 +272,37 @@
|
|||||||
color: #666;
|
color: #666;
|
||||||
line-height: 1.4;
|
line-height: 1.4;
|
||||||
}
|
}
|
||||||
|
.info-box {
|
||||||
|
background: rgba(233, 69, 96, 0.05);
|
||||||
|
border: 1px solid #2a3a5e;
|
||||||
|
border-radius: 6px;
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #b0b0b0;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
.info-box p {
|
||||||
|
margin: 0 0 0.5rem;
|
||||||
|
}
|
||||||
|
.info-box p:last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
.info-box .info-title {
|
||||||
|
color: #e0e0e0;
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
.info-box ol {
|
||||||
|
margin: 0.25rem 0 0.5rem;
|
||||||
|
padding-left: 1.25rem;
|
||||||
|
}
|
||||||
|
.info-box li {
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
.info-box strong {
|
||||||
|
color: #e0e0e0;
|
||||||
|
}
|
||||||
.modal-footer {
|
.modal-footer {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: flex-end;
|
justify-content: flex-end;
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ export interface AppSettings {
|
|||||||
transcription_device: string;
|
transcription_device: string;
|
||||||
transcription_language: string;
|
transcription_language: string;
|
||||||
skip_diarization: boolean;
|
skip_diarization: boolean;
|
||||||
|
hf_token: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaults: AppSettings = {
|
const defaults: AppSettings = {
|
||||||
@@ -29,6 +30,7 @@ const defaults: AppSettings = {
|
|||||||
transcription_device: 'cpu',
|
transcription_device: 'cpu',
|
||||||
transcription_language: '',
|
transcription_language: '',
|
||||||
skip_diarization: false,
|
skip_diarization: false,
|
||||||
|
hf_token: '',
|
||||||
};
|
};
|
||||||
|
|
||||||
export const settings = writable<AppSettings>({ ...defaults });
|
export const settings = writable<AppSettings>({ ...defaults });
|
||||||
|
|||||||
@@ -133,6 +133,7 @@
|
|||||||
device: $settings.transcription_device || undefined,
|
device: $settings.transcription_device || undefined,
|
||||||
language: $settings.transcription_language || undefined,
|
language: $settings.transcription_language || undefined,
|
||||||
skipDiarization: $settings.skip_diarization || undefined,
|
skipDiarization: $settings.skip_diarization || undefined,
|
||||||
|
hfToken: $settings.hf_token || undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Create speaker entries from pipeline result
|
// Create speaker entries from pipeline result
|
||||||
|
|||||||
Reference in New Issue
Block a user