From 02c70f90c889fe929c6da932146cd03fcca366f0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 22 Mar 2026 20:04:07 -0700 Subject: [PATCH] Extract audio from video files before loading Video files (MP4, MKV, etc.) are now processed with ffmpeg to extract audio to a temp WAV file before loading into wavesurfer. This prevents the WebView crash caused by trying to fetch multi-GB files into memory. - New extract_audio Tauri command uses ffmpeg (sidecar-bundled or system) - Frontend detects video extensions and extracts audio automatically - User-friendly error if ffmpeg is not installed with install instructions - Reverted wavesurfer MediaElement approach in favor of clean extraction - Added FFmpeg install guide to USER_GUIDE.md Co-Authored-By: Claude Opus 4.6 --- docs/USER_GUIDE.md | 41 +++++++++- src-tauri/src/commands/media.rs | 95 ++++++++++++++++++++++++ src-tauri/src/commands/mod.rs | 1 + src-tauri/src/lib.rs | 2 + src/lib/components/WaveformPlayer.svelte | 6 ++ src/routes/+page.svelte | 31 +++++++- 6 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 src-tauri/src/commands/media.rs diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index bee5eb0..f553709 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -26,10 +26,13 @@ The sidecar only needs to be downloaded once. Updates are detected automatically ## Basic Workflow -### 1. Import Audio +### 1. Import Audio or Video - Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac) -- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM +- **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA +- **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted + +> **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system. ### 2. Transcribe @@ -181,8 +184,42 @@ If you prefer cloud-based AI: --- +## Installing FFmpeg + +FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription. + +**Windows:** +``` +winget install ffmpeg +``` +Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH. + +**macOS:** +``` +brew install ffmpeg +``` + +**Linux (Debian/Ubuntu):** +``` +sudo apt install ffmpeg +``` + +**Linux (Fedora/RHEL):** +``` +sudo dnf install ffmpeg +``` + +After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.). + +--- + ## Troubleshooting +### Video import fails / "FFmpeg not found" +- Install FFmpeg using the instructions above +- Make sure `ffmpeg` is in your system PATH +- Restart Voice to Notes after installing + ### Transcription is slow - Use a smaller model (tiny or base) - If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device diff --git a/src-tauri/src/commands/media.rs b/src-tauri/src/commands/media.rs new file mode 100644 index 0000000..2ac353e --- /dev/null +++ b/src-tauri/src/commands/media.rs @@ -0,0 +1,95 @@ +use std::path::PathBuf; +use std::process::Command; + +/// Extract audio from a video file to a WAV file using ffmpeg. +/// Returns the path to the extracted audio file. +#[tauri::command] +pub fn extract_audio(file_path: String) -> Result { + let input = PathBuf::from(&file_path); + if !input.exists() { + return Err(format!("File not found: {}", file_path)); + } + + // Output to a temp WAV file next to the original or in temp dir + let stem = input.file_stem().unwrap_or_default().to_string_lossy(); + let output = std::env::temp_dir().join(format!("{stem}_audio.wav")); + + eprintln!( + "[media] Extracting audio: {} -> {}", + input.display(), + output.display() + ); + + // Find ffmpeg — check sidecar extract dir first, then system PATH + let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?; + + let status = Command::new(&ffmpeg) + .args([ + "-y", // Overwrite output + "-i", + &file_path, + "-vn", // No video + "-acodec", + "pcm_s16le", // WAV PCM 16-bit + "-ar", + "16000", // 16kHz (optimal for whisper) + "-ac", + "1", // Mono + ]) + .arg(output.to_str().unwrap()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::piped()) + .status() + .map_err(|e| format!("Failed to run ffmpeg: {e}"))?; + + if !status.success() { + return Err(format!("ffmpeg exited with status {status}")); + } + + if !output.exists() { + return Err("ffmpeg completed but output file not found".to_string()); + } + + eprintln!("[media] Audio extracted successfully"); + Ok(output.to_string_lossy().to_string()) +} + +/// Find ffmpeg binary — check sidecar directory first, then system PATH. +fn find_ffmpeg() -> Option { + // Check sidecar extract dir (ffmpeg is bundled with the sidecar) + if let Some(data_dir) = crate::sidecar::DATA_DIR.get() { + // Read sidecar version to find the right directory + let version_file = data_dir.join("sidecar-version.txt"); + if let Ok(version) = std::fs::read_to_string(&version_file) { + let version = version.trim(); + let sidecar_dir = data_dir.join(format!("sidecar-{version}")); + let ffmpeg_name = if cfg!(target_os = "windows") { + "ffmpeg.exe" + } else { + "ffmpeg" + }; + let ffmpeg_path = sidecar_dir.join(ffmpeg_name); + if ffmpeg_path.exists() { + return Some(ffmpeg_path.to_string_lossy().to_string()); + } + } + } + + // Fall back to system PATH + let ffmpeg_name = if cfg!(target_os = "windows") { + "ffmpeg.exe" + } else { + "ffmpeg" + }; + if Command::new(ffmpeg_name) + .arg("-version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok() + { + return Some(ffmpeg_name.to_string()); + } + + None +} diff --git a/src-tauri/src/commands/mod.rs b/src-tauri/src/commands/mod.rs index 3b987a3..a7f1070 100644 --- a/src-tauri/src/commands/mod.rs +++ b/src-tauri/src/commands/mod.rs @@ -1,5 +1,6 @@ pub mod ai; pub mod export; +pub mod media; pub mod project; pub mod settings; pub mod sidecar; diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 3ab7897..5854092 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -9,6 +9,7 @@ use tauri::Manager; use commands::ai::{ai_chat, ai_configure, ai_list_providers}; use commands::export::export_transcript; +use commands::media::extract_audio; use commands::project::{ create_project, delete_project, get_project, list_projects, load_project_file, load_project_transcript, save_project_file, save_project_transcript, update_segment, @@ -73,6 +74,7 @@ pub fn run() { check_sidecar_update, log_frontend, toggle_devtools, + extract_audio, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src/lib/components/WaveformPlayer.svelte b/src/lib/components/WaveformPlayer.svelte index 4ffeda9..c068352 100644 --- a/src/lib/components/WaveformPlayer.svelte +++ b/src/lib/components/WaveformPlayer.svelte @@ -57,6 +57,12 @@ isReady = false; }); + wavesurfer.on('error', (err: Error) => { + console.error('[voice-to-notes] WaveSurfer error:', err); + isLoading = false; + loadError = 'Failed to load audio'; + }); + if (audioUrl) { loadAudio(audioUrl); } diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 298332e..0522aa9 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -254,6 +254,8 @@ // Changes persist when user saves the project file. } + const VIDEO_EXTENSIONS = ['mp4', 'mkv', 'avi', 'mov', 'webm']; + async function handleFileImport() { const filePath = await open({ multiple: false, @@ -265,9 +267,34 @@ }); if (!filePath) return; - // Track the original file path and convert to asset URL for wavesurfer + // For video files, extract audio first using ffmpeg + const ext = filePath.split('.').pop()?.toLowerCase() ?? ''; + let audioPath = filePath; + if (VIDEO_EXTENSIONS.includes(ext)) { + try { + audioPath = await invoke('extract_audio', { filePath }); + } catch (err) { + console.error('[voice-to-notes] Failed to extract audio:', err); + const msg = String(err); + if (msg.includes('ffmpeg not found')) { + alert( + 'FFmpeg is required to import video files.\n\n' + + 'Install FFmpeg:\n' + + ' Windows: winget install ffmpeg\n' + + ' macOS: brew install ffmpeg\n' + + ' Linux: sudo apt install ffmpeg\n\n' + + 'Then restart Voice to Notes and try again.' + ); + } else { + alert(`Failed to extract audio from video: ${msg}`); + } + return; + } + } + + // Track the original file path (video or audio) for the sidecar audioFilePath = filePath; - audioUrl = convertFileSrc(filePath); + audioUrl = convertFileSrc(audioPath); waveformPlayer?.loadAudio(audioUrl); // Clear previous results