Improve import UX: progress overlay, pyannote fix, debug logging

- Enhanced ProgressOverlay with spinner, better styling, and z-index 9999
- Import button shows "Processing..." with pulse animation while transcribing
- Fix pyannote API: use token= instead of deprecated use_auth_token=
- Read HF_TOKEN from environment for pyannote model download
- Add console logging for click-to-seek debugging
- Add color-scheme: dark for native form controls

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 17:43:49 -08:00
parent 669d88f143
commit 87b3ad94f9
4 changed files with 87 additions and 40 deletions

View File

@@ -40,40 +40,39 @@ class DiarizeService:
if self._pipeline is not None: if self._pipeline is not None:
return self._pipeline return self._pipeline
import os
print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True) print("[sidecar] Loading pyannote diarization pipeline...", file=sys.stderr, flush=True)
try: hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or None
from pyannote.audio import Pipeline
self._pipeline = Pipeline.from_pretrained( models = [
"pyannote/speaker-diarization-3.1", "pyannote/speaker-diarization-3.1",
use_auth_token=False, "pyannote/speaker-diarization",
) ]
except Exception:
# Fall back to a simpler approach if the model isn't available last_error: Exception | None = None
# pyannote requires HuggingFace token for some models for model_name in models:
# Try the community model first
try: try:
from pyannote.audio import Pipeline from pyannote.audio import Pipeline
self._pipeline = Pipeline.from_pretrained( self._pipeline = Pipeline.from_pretrained(model_name, token=hf_token)
"pyannote/speaker-diarization", print(f"[sidecar] Loaded diarization model: {model_name}", file=sys.stderr, flush=True)
use_auth_token=False, return self._pipeline
)
except Exception as e: except Exception as e:
last_error = e
print( print(
f"[sidecar] Warning: Could not load pyannote pipeline: {e}", f"[sidecar] Warning: Could not load {model_name}: {e}",
file=sys.stderr, file=sys.stderr,
flush=True, flush=True,
) )
raise RuntimeError(
"pyannote.audio pipeline not available. "
"You may need to accept the model license at "
"https://huggingface.co/pyannote/speaker-diarization-3.1 "
"and set a HF_TOKEN environment variable."
) from e
return self._pipeline raise RuntimeError(
"pyannote.audio pipeline not available. "
"You may need to accept the model license at "
"https://huggingface.co/pyannote/speaker-diarization-3.1 "
"and set a HF_TOKEN environment variable."
) from last_error
def diarize( def diarize(
self, self,

View File

@@ -12,11 +12,15 @@
{#if visible} {#if visible}
<div class="overlay"> <div class="overlay">
<div class="progress-card"> <div class="progress-card">
<h3>{stage}</h3> <div class="spinner-row">
<div class="bar-track"> <div class="spinner"></div>
<div class="bar-fill" style="width: {percent}%"></div> <h3>{stage || 'Processing...'}</h3>
</div> </div>
<p>{percent}% — {message}</p> <div class="bar-track">
<div class="bar-fill" style="width: {Math.max(percent, 2)}%"></div>
</div>
<p class="status-text">{percent}% — {message || 'Please wait...'}</p>
<p class="hint-text">This may take several minutes for large files</p>
</div> </div>
</div> </div>
{/if} {/if}
@@ -25,34 +29,63 @@
.overlay { .overlay {
position: fixed; position: fixed;
inset: 0; inset: 0;
background: rgba(0, 0, 0, 0.7); background: rgba(0, 0, 0, 0.8);
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: center; justify-content: center;
z-index: 1000; z-index: 9999;
} }
.progress-card { .progress-card {
background: #16213e; background: #16213e;
padding: 2rem; padding: 2rem 2.5rem;
border-radius: 12px; border-radius: 12px;
min-width: 400px; min-width: 420px;
color: #e0e0e0; color: #e0e0e0;
border: 1px solid #2a3a5e;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
}
.spinner-row {
display: flex;
align-items: center;
gap: 0.75rem;
margin-bottom: 1.25rem;
}
.spinner {
width: 20px;
height: 20px;
border: 3px solid #2a3a5e;
border-top-color: #e94560;
border-radius: 50%;
animation: spin 0.8s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
h3 {
margin: 0;
text-transform: capitalize;
font-size: 1.1rem;
} }
h3 { margin: 0 0 1rem; text-transform: capitalize; }
.bar-track { .bar-track {
height: 8px; height: 10px;
background: #0f3460; background: #0f3460;
border-radius: 4px; border-radius: 5px;
overflow: hidden; overflow: hidden;
} }
.bar-fill { .bar-fill {
height: 100%; height: 100%;
background: #e94560; background: linear-gradient(90deg, #e94560, #ff6b81);
transition: width 0.3s; transition: width 0.3s;
border-radius: 5px;
} }
p { .status-text {
margin: 0.75rem 0 0;
font-size: 0.9rem;
color: #b0b0b0;
}
.hint-text {
margin: 0.5rem 0 0; margin: 0.5rem 0 0;
font-size: 0.875rem; font-size: 0.75rem;
color: #999; color: #666;
} }
</style> </style>

View File

@@ -78,6 +78,7 @@
/** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */ /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */
export function seekTo(timeMs: number) { export function seekTo(timeMs: number) {
console.log('[voice-to-notes] seekTo called:', timeMs, 'ms, wavesurfer:', !!wavesurfer, 'duration:', wavesurfer?.getDuration());
if (wavesurfer) { if (wavesurfer) {
wavesurfer.setTime(timeMs / 1000); wavesurfer.setTime(timeMs / 1000);
if (!wavesurfer.isPlaying()) { if (!wavesurfer.isPlaying()) {

View File

@@ -68,6 +68,7 @@
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77']; const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
function handleWordClick(timeMs: number) { function handleWordClick(timeMs: number) {
console.log('[voice-to-notes] Word clicked, seeking to', timeMs, 'ms');
waveformPlayer?.seekTo(timeMs); waveformPlayer?.seekTo(timeMs);
} }
@@ -232,8 +233,12 @@
<div class="app-header"> <div class="app-header">
<h1>Voice to Notes</h1> <h1>Voice to Notes</h1>
<div class="header-actions"> <div class="header-actions">
<button class="import-btn" onclick={handleFileImport}> <button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
Import Audio/Video {#if isTranscribing}
Processing...
{:else}
Import Audio/Video
{/if}
</button> </button>
<button class="settings-btn" onclick={() => showSettings = true} title="Settings"> <button class="settings-btn" onclick={() => showSettings = true} title="Settings">
Settings Settings
@@ -303,9 +308,18 @@
font-size: 0.875rem; font-size: 0.875rem;
font-weight: 500; font-weight: 500;
} }
.import-btn:hover { .import-btn:hover:not(:disabled) {
background: #d63851; background: #d63851;
} }
.import-btn:disabled {
opacity: 0.7;
cursor: not-allowed;
animation: pulse 1.5s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 0.7; }
50% { opacity: 1; }
}
.header-actions { .header-actions {
display: flex; display: flex;
gap: 0.5rem; gap: 0.5rem;