Improve import UX: progress overlay, pyannote fix, debug logging

- Enhanced ProgressOverlay with spinner, better styling, and z-index 9999 - Import button shows "Processing..." with pulse animation while transcribing - Fix pyannote API: use token= instead of deprecated use_auth_token= - Read HF_TOKEN from environment for pyannote model download - Add console logging for click-to-seek debugging - Add color-scheme: dark for native form controls Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 17:43:49 -08:00
parent 669d88f143
commit 87b3ad94f9
4 changed files with 87 additions and 40 deletions
--- a/python/voice_to_notes/services/diarize.py
+++ b/python/voice_to_notes/services/diarize.py
@@ -40,40 +40,39 @@ class DiarizeService:
        if self._pipeline is not None:
            return self._pipeline
        import os
        print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
-        try:
+        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
            from pyannote.audio import Pipeline
-            self._pipeline = Pipeline.from_pretrained(
+        models = [
-                "pyannote/speaker-diarization-3.1",
+            "pyannote/speaker-diarization-3.1",
-                use_auth_token=False,
+            "pyannote/speaker-diarization",
-            )
+        ]
-        except Exception:
+
-            # Fall back to a simpler approach if the model isn't available
+        last_error: Exception | None = None
-            # pyannote requires HuggingFace token for some models
+        for model_name in models:
            # Try the community model first
            try:
                from pyannote.audio import Pipeline
-                self._pipeline = Pipeline.from_pretrained(
+                self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
-                    "pyannote/speaker-diarization",
+                print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
-                    use_auth_token=False,
+                return self._pipeline
                )
            except Exception as e:
                last_error = e
                print(
-                    f"[sidecar] Warning: Could not load pyannote pipeline: {e}",
+                    f"[sidecar] Warning: Could not load {model_name}: {e}",
                    file=sys.stderr,
                    flush=True,
                )
                raise RuntimeError(
                    "pyannote.audio pipeline not available. "
                    "You may need to accept the model license at "
                    "https://huggingface.co/pyannote/speaker-diarization-3.1 "
                    "and set a HF_TOKEN environment variable."
                ) from e
-        return self._pipeline
+        raise RuntimeError(
            "pyannote.audio pipeline not available. "
            "You may need to accept the model license at "
            "https://huggingface.co/pyannote/speaker-diarization-3.1 "
            "and set a HF_TOKEN environment variable."
        ) from last_error
    def diarize(
        self,
--- a/src/lib/components/ProgressOverlay.svelte
+++ b/src/lib/components/ProgressOverlay.svelte
@@ -12,11 +12,15 @@
 {#if visible}
  <div class="overlay">
    <div class="progress-card">
-      <h3>{stage}</h3>
+      <div class="spinner-row">
-      <div class="bar-track">
+        <div class="spinner"></div>
-        <div class="bar-fill" style="width: {percent}%"></div>
+        <h3>{stage || 'Processing...'}</h3>
      </div>
-      <p>{percent}% — {message}</p>
+      <div class="bar-track">
        <div class="bar-fill" style="width: {Math.max(percent, 2)}%"></div>
      </div>
      <p class="status-text">{percent}% — {message || 'Please wait...'}</p>
      <p class="hint-text">This may take several minutes for large files</p>
    </div>
  </div>
 {/if}
@@ -25,34 +29,63 @@
  .overlay {
    position: fixed;
    inset: 0;
-    background: rgba(0, 0, 0, 0.7);
+    background: rgba(0, 0, 0, 0.8);
    display: flex;
    align-items: center;
    justify-content: center;
-    z-index: 1000;
+    z-index: 9999;
  }
  .progress-card {
    background: #16213e;
-    padding: 2rem;
+    padding: 2rem 2.5rem;
    border-radius: 12px;
-    min-width: 400px;
+    min-width: 420px;
    color: #e0e0e0;
    border: 1px solid #2a3a5e;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
  }
  .spinner-row {
    display: flex;
    align-items: center;
    gap: 0.75rem;
    margin-bottom: 1.25rem;
  }
  .spinner {
    width: 20px;
    height: 20px;
    border: 3px solid #2a3a5e;
    border-top-color: #e94560;
    border-radius: 50%;
    animation: spin 0.8s linear infinite;
  }
  @keyframes spin {
    to { transform: rotate(360deg); }
  }
  h3 {
    margin: 0;
    text-transform: capitalize;
    font-size: 1.1rem;
  }
  h3 { margin: 0 0 1rem; text-transform: capitalize; }
  .bar-track {
-    height: 8px;
+    height: 10px;
    background: #0f3460;
-    border-radius: 4px;
+    border-radius: 5px;
    overflow: hidden;
  }
  .bar-fill {
    height: 100%;
-    background: #e94560;
+    background: linear-gradient(90deg, #e94560, #ff6b81);
    transition: width 0.3s;
    border-radius: 5px;
  }
-  p {
+  .status-text {
    margin: 0.75rem 0 0;
    font-size: 0.9rem;
    color: #b0b0b0;
  }
  .hint-text {
    margin: 0.5rem 0 0;
-    font-size: 0.875rem;
+    font-size: 0.75rem;
-    color: #999;
+    color: #666;
  }
 </style>
--- a/src/lib/components/WaveformPlayer.svelte
+++ b/src/lib/components/WaveformPlayer.svelte
@@ -78,6 +78,7 @@
  /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
  export function seekTo(timeMs: number) {
    console.log('[voice-to-notes] seekTo called:', timeMs, 'ms, wavesurfer:', !!wavesurfer, 'duration:', wavesurfer?.getDuration());
    if (wavesurfer) {
      wavesurfer.setTime(timeMs / 1000);
      if (!wavesurfer.isPlaying()) {
--- a/src/routes/+page.svelte
+++ b/src/routes/+page.svelte
@@ -68,6 +68,7 @@
  const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
  function handleWordClick(timeMs: number) {
    console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
    waveformPlayer?.seekTo(timeMs);
  }
@@ -232,8 +233,12 @@
 <div class="app-header">
  <h1>Voice to Notes</h1>
  <div class="header-actions">
-    <button class="import-btn" onclick={handleFileImport}>
+    <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
-      Import Audio/Video
+      {#if isTranscribing}
        Processing...
      {:else}
        Import Audio/Video
      {/if}
    </button>
    <button class="settings-btn" onclick={() => showSettings = true} title="Settings">
      Settings
@@ -303,9 +308,18 @@
    font-size: 0.875rem;
    font-weight: 500;
  }
-  .import-btn:hover {
+  .import-btn:hover:not(:disabled) {
    background: #d63851;
  }
  .import-btn:disabled {
    opacity: 0.7;
    cursor: not-allowed;
    animation: pulse 1.5s ease-in-out infinite;
  }
  @keyframes pulse {
    0%, 100% { opacity: 0.7; }
    50% { opacity: 1; }
  }
  .header-actions {
    display: flex;
    gap: 0.5rem;