diff --git a/python/voice_to_notes/services/diarize.py b/python/voice_to_notes/services/diarize.py
index 201ca9c..77079c3 100644
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -40,40 +40,39 @@ class DiarizeService:
if self._pipeline is not None:
return self._pipeline
+ import os
+
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
- try:
- from pyannote.audio import Pipeline
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
- self._pipeline = Pipeline.from_pretrained(
- "pyannote/speaker-diarization-3.1",
- use_auth_token=False,
- )
- except Exception:
- # Fall back to a simpler approach if the model isn't available
- # pyannote requires HuggingFace token for some models
- # Try the community model first
+ models = [
+ "pyannote/speaker-diarization-3.1",
+ "pyannote/speaker-diarization",
+ ]
+
+ last_error: Exception | None = None
+ for model_name in models:
try:
from pyannote.audio import Pipeline
- self._pipeline = Pipeline.from_pretrained(
- "pyannote/speaker-diarization",
- use_auth_token=False,
- )
+ self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
+ print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
+ return self._pipeline
except Exception as e:
+ last_error = e
print(
- f"[sidecar] Warning: Could not load pyannote pipeline: {e}",
+ f"[sidecar] Warning: Could not load {model_name}: {e}",
file=sys.stderr,
flush=True,
)
- raise RuntimeError(
- "pyannote.audio pipeline not available. "
- "You may need to accept the model license at "
- "https://huggingface.co/pyannote/speaker-diarization-3.1 "
- "and set a HF_TOKEN environment variable."
- ) from e
- return self._pipeline
+ raise RuntimeError(
+ "pyannote.audio pipeline not available. "
+ "You may need to accept the model license at "
+ "https://huggingface.co/pyannote/speaker-diarization-3.1 "
+ "and set a HF_TOKEN environment variable."
+ ) from last_error
def diarize(
self,
diff --git a/src/lib/components/ProgressOverlay.svelte b/src/lib/components/ProgressOverlay.svelte
index acb2725..d88a147 100644
--- a/src/lib/components/ProgressOverlay.svelte
+++ b/src/lib/components/ProgressOverlay.svelte
@@ -12,11 +12,15 @@
{#if visible}
-
{stage}
-
-
+
+
+
{stage || 'Processing...'}
-
{percent}% — {message}
+
+
{percent}% — {message || 'Please wait...'}
+
This may take several minutes for large files
{/if}
@@ -25,34 +29,63 @@
.overlay {
position: fixed;
inset: 0;
- background: rgba(0, 0, 0, 0.7);
+ background: rgba(0, 0, 0, 0.8);
display: flex;
align-items: center;
justify-content: center;
- z-index: 1000;
+ z-index: 9999;
}
.progress-card {
background: #16213e;
- padding: 2rem;
+ padding: 2rem 2.5rem;
border-radius: 12px;
- min-width: 400px;
+ min-width: 420px;
color: #e0e0e0;
+ border: 1px solid #2a3a5e;
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
+ }
+ .spinner-row {
+ display: flex;
+ align-items: center;
+ gap: 0.75rem;
+ margin-bottom: 1.25rem;
+ }
+ .spinner {
+ width: 20px;
+ height: 20px;
+ border: 3px solid #2a3a5e;
+ border-top-color: #e94560;
+ border-radius: 50%;
+ animation: spin 0.8s linear infinite;
+ }
+ @keyframes spin {
+ to { transform: rotate(360deg); }
+ }
+ h3 {
+ margin: 0;
+ text-transform: capitalize;
+ font-size: 1.1rem;
}
- h3 { margin: 0 0 1rem; text-transform: capitalize; }
.bar-track {
- height: 8px;
+ height: 10px;
background: #0f3460;
- border-radius: 4px;
+ border-radius: 5px;
overflow: hidden;
}
.bar-fill {
height: 100%;
- background: #e94560;
+ background: linear-gradient(90deg, #e94560, #ff6b81);
transition: width 0.3s;
+ border-radius: 5px;
}
- p {
+ .status-text {
+ margin: 0.75rem 0 0;
+ font-size: 0.9rem;
+ color: #b0b0b0;
+ }
+ .hint-text {
margin: 0.5rem 0 0;
- font-size: 0.875rem;
- color: #999;
+ font-size: 0.75rem;
+ color: #666;
}
diff --git a/src/lib/components/WaveformPlayer.svelte b/src/lib/components/WaveformPlayer.svelte
index 18bb373..10c230c 100644
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -78,6 +78,7 @@
/** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
export function seekTo(timeMs: number) {
+ console.log('[voice-to-notes] seekTo called:', timeMs, 'ms, wavesurfer:', !!wavesurfer, 'duration:', wavesurfer?.getDuration());
if (wavesurfer) {
wavesurfer.setTime(timeMs / 1000);
if (!wavesurfer.isPlaying()) {
diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte
index 2260060..061e480 100644
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -68,6 +68,7 @@
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
function handleWordClick(timeMs: number) {
+ console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
waveformPlayer?.seekTo(timeMs);
}
@@ -232,8 +233,12 @@