From ed626b8ba06404aea66510ac2edad17e44ffd874 Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Thu, 26 Feb 2026 18:02:48 -0800 Subject: [PATCH] Fix progress overlay, play-from-position, layout cutoff, speaker info - Replace progress bar with task checklist showing pipeline steps (load model, transcribe, load diarization, identify speakers, merge) - Fix WaveformPlayer: track ready state, disable controls until loaded, play from current position instead of resetting to start - Fix workspace height calc to prevent bottom content cutoff - Show HF_TOKEN setup hint in SpeakerManager when no speakers detected - Add console logging for progress events to aid debugging Co-Authored-By: Claude Opus 4.6 --- python/voice_to_notes/services/pipeline.py | 2 +- src/lib/components/ProgressOverlay.svelte | 102 +++++++++++++++++---- src/lib/components/SpeakerManager.svelte | 19 +++- src/lib/components/WaveformPlayer.svelte | 50 +++++++--- src/routes/+page.svelte | 4 +- 5 files changed, 141 insertions(+), 36 deletions(-) diff --git a/python/voice_to_notes/services/pipeline.py b/python/voice_to_notes/services/pipeline.py index fe4bf04..59f8186 100644 --- a/python/voice_to_notes/services/pipeline.py +++ b/python/voice_to_notes/services/pipeline.py @@ -140,7 +140,7 @@ class PipelineService: # Step 3: Merge (or skip if diarization failed) if diarization is not None: write_message( - progress_message(request_id, 90, "pipeline", "Merging transcript with speakers...") + progress_message(request_id, 90, "merging", "Merging transcript with speakers...") ) result = self._merge_results(transcription, diarization.speaker_segments) result.speakers = diarization.speakers diff --git a/src/lib/components/ProgressOverlay.svelte b/src/lib/components/ProgressOverlay.svelte index d88a147..2b7f40c 100644 --- a/src/lib/components/ProgressOverlay.svelte +++ b/src/lib/components/ProgressOverlay.svelte @@ -7,6 +7,38 @@ } let { visible = false, percent = 0, stage = '', message = '' }: Props = $props(); + + // Map internal stage names to user-friendly labels + const stageLabels: Record = { + 'pipeline': 'Pipeline', + 'loading_model': 'Loading Model', + 'transcribing': 'Transcribing', + 'loading_diarization': 'Loading Diarization', + 'diarizing': 'Speaker Detection', + 'done': 'Complete', + }; + + // Pipeline steps for the task list + const pipelineSteps = [ + { key: 'loading_model', label: 'Load transcription model' }, + { key: 'transcribing', label: 'Transcribe audio' }, + { key: 'loading_diarization', label: 'Load speaker detection model' }, + { key: 'diarizing', label: 'Identify speakers' }, + { key: 'merging', label: 'Merge results' }, + ]; + + function getStepStatus(stepKey: string, currentStage: string): 'pending' | 'active' | 'done' { + const stepOrder = pipelineSteps.map(s => s.key); + const currentIdx = stepOrder.indexOf(currentStage); + const stepIdx = stepOrder.indexOf(stepKey); + + if (currentStage === 'done') return 'done'; + if (stepIdx < currentIdx) return 'done'; + if (stepIdx === currentIdx) return 'active'; + return 'pending'; + } + + let displayStage = $derived(stageLabels[stage] || stage || 'Processing...'); {#if visible} @@ -14,12 +46,28 @@
-

{stage || 'Processing...'}

+

{displayStage}

-
-
+ +
+ {#each pipelineSteps as step} + {@const status = getStepStatus(step.key, stage)} +
+ + {#if status === 'done'} + ✓ + {:else if status === 'active'} + ⟳ + {:else} + · + {/if} + + {step.label} +
+ {/each}
-

{percent}% — {message || 'Please wait...'}

+ +

{message || 'Please wait...'}

This may take several minutes for large files

@@ -39,7 +87,8 @@ background: #16213e; padding: 2rem 2.5rem; border-radius: 12px; - min-width: 420px; + min-width: 380px; + max-width: 440px; color: #e0e0e0; border: 1px solid #2a3a5e; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); @@ -57,35 +106,52 @@ border-top-color: #e94560; border-radius: 50%; animation: spin 0.8s linear infinite; + flex-shrink: 0; } @keyframes spin { to { transform: rotate(360deg); } } h3 { margin: 0; - text-transform: capitalize; font-size: 1.1rem; } - .bar-track { - height: 10px; - background: #0f3460; - border-radius: 5px; - overflow: hidden; + .steps { + display: flex; + flex-direction: column; + gap: 0.4rem; + margin-bottom: 1rem; } - .bar-fill { - height: 100%; - background: linear-gradient(90deg, #e94560, #ff6b81); - transition: width 0.3s; - border-radius: 5px; + .step { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 0.85rem; + color: #555; + } + .step-done { + color: #4ecdc4; + } + .step-active { + color: #e0e0e0; + font-weight: 500; + } + .step-icon { + width: 1.2rem; + text-align: center; + flex-shrink: 0; + } + .step-active .step-icon { + animation: spin 1.5s linear infinite; + display: inline-block; } .status-text { margin: 0.75rem 0 0; - font-size: 0.9rem; + font-size: 0.85rem; color: #b0b0b0; } .hint-text { margin: 0.5rem 0 0; font-size: 0.75rem; - color: #666; + color: #555; } diff --git a/src/lib/components/SpeakerManager.svelte b/src/lib/components/SpeakerManager.svelte index c5a925b..a3c773a 100644 --- a/src/lib/components/SpeakerManager.svelte +++ b/src/lib/components/SpeakerManager.svelte @@ -34,7 +34,11 @@

Speakers

{#if $speakers.length === 0} -

No speakers detected yet

+

No speakers detected

+

+ Speaker detection requires a HuggingFace token. + Set the HF_TOKEN environment variable and restart. +

{:else}
    {#each $speakers as speaker (speaker.id)} @@ -78,6 +82,19 @@ .empty-hint { color: #666; font-size: 0.875rem; + margin-bottom: 0.25rem; + } + .setup-hint { + color: #555; + font-size: 0.75rem; + line-height: 1.4; + } + .setup-hint code { + background: rgba(233, 69, 96, 0.15); + color: #e94560; + padding: 0.1rem 0.3rem; + border-radius: 3px; + font-size: 0.7rem; } .speaker-list { list-style: none; diff --git a/src/lib/components/WaveformPlayer.svelte b/src/lib/components/WaveformPlayer.svelte index 10c230c..ae45220 100644 --- a/src/lib/components/WaveformPlayer.svelte +++ b/src/lib/components/WaveformPlayer.svelte @@ -12,6 +12,7 @@ let container: HTMLDivElement; let wavesurfer: WaveSurfer | null = $state(null); + let isReady = $state(false); let currentTime = $state('0:00'); let totalTime = $state('0:00'); @@ -39,6 +40,7 @@ }); wavesurfer.on('ready', () => { + isReady = true; const dur = wavesurfer!.getDuration(); durationMs.set(Math.round(dur * 1000)); totalTime = formatTime(dur); @@ -48,6 +50,10 @@ wavesurfer.on('pause', () => isPlaying.set(false)); wavesurfer.on('finish', () => isPlaying.set(false)); + wavesurfer.on('loading', () => { + isReady = false; + }); + if (audioUrl) { wavesurfer.load(audioUrl); } @@ -57,20 +63,21 @@ wavesurfer?.destroy(); }); - /** Toggle play/pause. Exposed for keyboard shortcuts. */ + /** Toggle play/pause from current position. Exposed for keyboard shortcuts. */ export function togglePlayPause() { - wavesurfer?.playPause(); + if (!wavesurfer || !isReady) return; + wavesurfer.playPause(); } function skipBack() { - if (wavesurfer) { + if (wavesurfer && isReady) { const time = Math.max(0, wavesurfer.getCurrentTime() - 5); wavesurfer.setTime(time); } } function skipForward() { - if (wavesurfer) { + if (wavesurfer && isReady) { const time = Math.min(wavesurfer.getDuration(), wavesurfer.getCurrentTime() + 5); wavesurfer.setTime(time); } @@ -78,17 +85,20 @@ /** Seek to a specific time in milliseconds. Called from transcript click-to-seek. */ export function seekTo(timeMs: number) { - console.log('[voice-to-notes] seekTo called:', timeMs, 'ms, wavesurfer:', !!wavesurfer, 'duration:', wavesurfer?.getDuration()); - if (wavesurfer) { - wavesurfer.setTime(timeMs / 1000); - if (!wavesurfer.isPlaying()) { - wavesurfer.play(); - } + if (!wavesurfer || !isReady) { + console.warn('[voice-to-notes] seekTo ignored — audio not ready yet'); + return; + } + const timeSec = timeMs / 1000; + wavesurfer.setTime(timeSec); + if (!wavesurfer.isPlaying()) { + wavesurfer.play(); } } /** Load a new audio file. */ export function loadAudio(url: string) { + isReady = false; wavesurfer?.load(url); } @@ -96,11 +106,17 @@
    - - + - + {currentTime} / {totalTime}
    @@ -130,9 +146,13 @@ cursor: pointer; font-size: 1rem; } - .control-btn:hover { + .control-btn:hover:not(:disabled) { background: #1a4a7a; } + .control-btn:disabled { + opacity: 0.4; + cursor: not-allowed; + } .play-btn { padding: 0.4rem 1rem; font-size: 1.2rem; diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index d1e1eba..8b88227 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -102,6 +102,7 @@ stage: string; message: string; }>('pipeline-progress', (event) => { + console.log('[voice-to-notes] Progress event:', event.payload); const { percent, stage, message } = event.payload; if (typeof percent === 'number') transcriptionProgress = percent; if (typeof stage === 'string') transcriptionStage = stage; @@ -387,7 +388,8 @@ display: flex; gap: 1rem; padding: 1rem; - height: calc(100vh - 3.5rem); + height: calc(100vh - 3rem); + overflow: hidden; background: #0a0a23; } .main-content {