Extract audio from video files before loading

Video files (MP4, MKV, etc.) are now processed with ffmpeg to extract audio to a temp WAV file before loading into wavesurfer. This prevents the WebView crash caused by trying to fetch multi-GB files into memory. - New extract_audio Tauri command uses ffmpeg (sidecar-bundled or system) - Frontend detects video extensions and extracts audio automatically - User-friendly error if ffmpeg is not installed with install instructions - Reverted wavesurfer MediaElement approach in favor of clean extraction - Added FFmpeg install guide to USER_GUIDE.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-22 20:04:07 -07:00
parent 66db827f17
commit 02c70f90c8
6 changed files with 172 additions and 4 deletions
@@ -26,10 +26,13 @@ The sidecar only needs to be downloaded once. Updates are detected automatically

 ## Basic Workflow

-### 1. Import Audio
+### 1. Import Audio or Video

 - Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM
+- **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA
+- **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted
+
+> **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system.

 ### 2. Transcribe

@@ -181,8 +184,42 @@ If you prefer cloud-based AI:

 ---

+## Installing FFmpeg
+
+FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription.
+
+**Windows:**
+```
+winget install ffmpeg
+```
+Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH.
+
+**macOS:**
+```
+brew install ffmpeg
+```
+
+**Linux (Debian/Ubuntu):**
+```
+sudo apt install ffmpeg
+```
+
+**Linux (Fedora/RHEL):**
+```
+sudo dnf install ffmpeg
+```
+
+After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.).
+
+---
+
 ## Troubleshooting

+### Video import fails / "FFmpeg not found"
+- Install FFmpeg using the instructions above
+- Make sure `ffmpeg` is in your system PATH
+- Restart Voice to Notes after installing
+
 ### Transcription is slow
 - Use a smaller model (tiny or base)
 - If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
@@ -0,0 +1,95 @@
+use std::path::PathBuf;
+use std::process::Command;
+
+/// Extract audio from a video file to a WAV file using ffmpeg.
+/// Returns the path to the extracted audio file.
+#[tauri::command]
+pub fn extract_audio(file_path: String) -> Result<String, String> {
+    let input = PathBuf::from(&file_path);
+    if !input.exists() {
+        return Err(format!("File not found: {}", file_path));
+    }
+
+    // Output to a temp WAV file next to the original or in temp dir
+    let stem = input.file_stem().unwrap_or_default().to_string_lossy();
+    let output = std::env::temp_dir().join(format!("{stem}_audio.wav"));
+
+    eprintln!(
+        "[media] Extracting audio: {} -> {}",
+        input.display(),
+        output.display()
+    );
+
+    // Find ffmpeg — check sidecar extract dir first, then system PATH
+    let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?;
+
+    let status = Command::new(&ffmpeg)
+        .args([
+            "-y",             // Overwrite output
+            "-i",
+            &file_path,
+            "-vn",            // No video
+            "-acodec",
+            "pcm_s16le",      // WAV PCM 16-bit
+            "-ar",
+            "16000",          // 16kHz (optimal for whisper)
+            "-ac",
+            "1",              // Mono
+        ])
+        .arg(output.to_str().unwrap())
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::piped())
+        .status()
+        .map_err(|e| format!("Failed to run ffmpeg: {e}"))?;
+
+    if !status.success() {
+        return Err(format!("ffmpeg exited with status {status}"));
+    }
+
+    if !output.exists() {
+        return Err("ffmpeg completed but output file not found".to_string());
+    }
+
+    eprintln!("[media] Audio extracted successfully");
+    Ok(output.to_string_lossy().to_string())
+}
+
+/// Find ffmpeg binary — check sidecar directory first, then system PATH.
+fn find_ffmpeg() -> Option<String> {
+    // Check sidecar extract dir (ffmpeg is bundled with the sidecar)
+    if let Some(data_dir) = crate::sidecar::DATA_DIR.get() {
+        // Read sidecar version to find the right directory
+        let version_file = data_dir.join("sidecar-version.txt");
+        if let Ok(version) = std::fs::read_to_string(&version_file) {
+            let version = version.trim();
+            let sidecar_dir = data_dir.join(format!("sidecar-{version}"));
+            let ffmpeg_name = if cfg!(target_os = "windows") {
+                "ffmpeg.exe"
+            } else {
+                "ffmpeg"
+            };
+            let ffmpeg_path = sidecar_dir.join(ffmpeg_name);
+            if ffmpeg_path.exists() {
+                return Some(ffmpeg_path.to_string_lossy().to_string());
+            }
+        }
+    }
+
+    // Fall back to system PATH
+    let ffmpeg_name = if cfg!(target_os = "windows") {
+        "ffmpeg.exe"
+    } else {
+        "ffmpeg"
+    };
+    if Command::new(ffmpeg_name)
+        .arg("-version")
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .is_ok()
+    {
+        return Some(ffmpeg_name.to_string());
+    }
+
+    None
+}
@@ -1,5 +1,6 @@
 pub mod ai;
 pub mod export;
+pub mod media;
 pub mod project;
 pub mod settings;
 pub mod sidecar;
@@ -9,6 +9,7 @@ use tauri::Manager;

 use commands::ai::{ai_chat, ai_configure, ai_list_providers};
 use commands::export::export_transcript;
+use commands::media::extract_audio;
 use commands::project::{
    create_project, delete_project, get_project, list_projects, load_project_file,
    load_project_transcript, save_project_file, save_project_transcript, update_segment,
@@ -73,6 +74,7 @@ pub fn run() {
            check_sidecar_update,
            log_frontend,
            toggle_devtools,
+            extract_audio,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
@@ -57,6 +57,12 @@
      isReady = false;
    });

+    wavesurfer.on('error', (err: Error) => {
+      console.error('[voice-to-notes] WaveSurfer error:', err);
+      isLoading = false;
+      loadError = 'Failed to load audio';
+    });
+
    if (audioUrl) {
      loadAudio(audioUrl);
    }
@@ -254,6 +254,8 @@
    // Changes persist when user saves the project file.
  }

+  const VIDEO_EXTENSIONS = ['mp4', 'mkv', 'avi', 'mov', 'webm'];
+
  async function handleFileImport() {
    const filePath = await open({
      multiple: false,
@@ -265,9 +267,34 @@
    });
    if (!filePath) return;

-    // Track the original file path and convert to asset URL for wavesurfer
+    // For video files, extract audio first using ffmpeg
+    const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
+    let audioPath = filePath;
+    if (VIDEO_EXTENSIONS.includes(ext)) {
+      try {
+        audioPath = await invoke<string>('extract_audio', { filePath });
+      } catch (err) {
+        console.error('[voice-to-notes] Failed to extract audio:', err);
+        const msg = String(err);
+        if (msg.includes('ffmpeg not found')) {
+          alert(
+            'FFmpeg is required to import video files.\n\n' +
+            'Install FFmpeg:\n' +
+            '  Windows: winget install ffmpeg\n' +
+            '  macOS: brew install ffmpeg\n' +
+            '  Linux: sudo apt install ffmpeg\n\n' +
+            'Then restart Voice to Notes and try again.'
+          );
+        } else {
+          alert(`Failed to extract audio from video: ${msg}`);
+        }
+        return;
+      }
+    }
+
+    // Track the original file path (video or audio) for the sidecar
    audioFilePath = filePath;
-    audioUrl = convertFileSrc(filePath);
+    audioUrl = convertFileSrc(audioPath);
    waveformPlayer?.loadAudio(audioUrl);

    // Clear previous results