diff --git a/python/voice_to_notes/services/diarize.py b/python/voice_to_notes/services/diarize.py index 201ca9c..77079c3 100644 --- a/python/voice_to_notes/services/diarize.py +++ b/python/voice_to_notes/services/diarize.py @@ -40,40 +40,39 @@ class DiarizeService: if self._pipeline is not None: return self._pipeline + import os + print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True) - try: - from pyannote.audio import Pipeline + hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None - self._pipeline = Pipeline.from_pretrained( - "pyannote/speaker-diarization-3.1", - use_auth_token=False, - ) - except Exception: - # Fall back to a simpler approach if the model isn't available - # pyannote requires HuggingFace token for some models - # Try the community model first + models = [ + "pyannote/speaker-diarization-3.1", + "pyannote/speaker-diarization", + ] + + last_error: Exception | None = None + for model_name in models: try: from pyannote.audio import Pipeline - self._pipeline = Pipeline.from_pretrained( - "pyannote/speaker-diarization", - use_auth_token=False, - ) + self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token) + print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True) + return self._pipeline except Exception as e: + last_error = e print( - f"[sidecar] Warning: Could not load pyannote pipeline: {e}", + f"[sidecar] Warning: Could not load {model_name}: {e}", file=sys.stderr, flush=True, ) - raise RuntimeError( - "pyannote.audio pipeline not available. " - "You may need to accept the model license at " - "https://huggingface.co/pyannote/speaker-diarization-3.1 " - "and set a HF_TOKEN environment variable." - ) from e - return self._pipeline + raise RuntimeError( + "pyannote.audio pipeline not available. " + "You may need to accept the model license at " + "https://huggingface.co/pyannote/speaker-diarization-3.1 " + "and set a HF_TOKEN environment variable." + ) from last_error def diarize( self, diff --git a/src/lib/components/ProgressOverlay.svelte b/src/lib/components/ProgressOverlay.svelte index acb2725..d88a147 100644 --- a/src/lib/components/ProgressOverlay.svelte +++ b/src/lib/components/ProgressOverlay.svelte @@ -12,11 +12,15 @@ {#if visible}
-

{stage}

-
-
+
+
+

{stage || 'Processing...'}

-

{percent}% — {message}

+
+
+
+

{percent}% — {message || 'Please wait...'}

+

This may take several minutes for large files

{/if} @@ -25,34 +29,63 @@ .overlay { position: fixed; inset: 0; - background: rgba(0, 0, 0, 0.7); + background: rgba(0, 0, 0, 0.8); display: flex; align-items: center; justify-content: center; - z-index: 1000; + z-index: 9999; } .progress-card { background: #16213e; - padding: 2rem; + padding: 2rem 2.5rem; border-radius: 12px; - min-width: 400px; + min-width: 420px; color: #e0e0e0; + border: 1px solid #2a3a5e; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); + } + .spinner-row { + display: flex; + align-items: center; + gap: 0.75rem; + margin-bottom: 1.25rem; + } + .spinner { + width: 20px; + height: 20px; + border: 3px solid #2a3a5e; + border-top-color: #e94560; + border-radius: 50%; + animation: spin 0.8s linear infinite; + } + @keyframes spin { + to { transform: rotate(360deg); } + } + h3 { + margin: 0; + text-transform: capitalize; + font-size: 1.1rem; } - h3 { margin: 0 0 1rem; text-transform: capitalize; } .bar-track { - height: 8px; + height: 10px; background: #0f3460; - border-radius: 4px; + border-radius: 5px; overflow: hidden; } .bar-fill { height: 100%; - background: #e94560; + background: linear-gradient(90deg, #e94560, #ff6b81); transition: width 0.3s; + border-radius: 5px; } - p { + .status-text { + margin: 0.75rem 0 0; + font-size: 0.9rem; + color: #b0b0b0; + } + .hint-text { margin: 0.5rem 0 0; - font-size: 0.875rem; - color: #999; + font-size: 0.75rem; + color: #666; } diff --git a/src/lib/components/WaveformPlayer.svelte b/src/lib/components/WaveformPlayer.svelte index 18bb373..10c230c 100644 --- a/src/lib/components/WaveformPlayer.svelte +++ b/src/lib/components/WaveformPlayer.svelte @@ -78,6 +78,7 @@ /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */ export function seekTo(timeMs: number) { + console.log('[voice-to-notes] seekTo called:', timeMs, 'ms, wavesurfer:', !!wavesurfer, 'duration:', wavesurfer?.getDuration()); if (wavesurfer) { wavesurfer.setTime(timeMs / 1000); if (!wavesurfer.isPlaying()) { diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 2260060..061e480 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -68,6 +68,7 @@ const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77']; function handleWordClick(timeMs: number) { + console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms'); waveformPlayer?.seekTo(timeMs); } @@ -232,8 +233,12 @@

Voice to Notes

-