Extract audio from video files before loading
Video files (MP4, MKV, etc.) are now processed with ffmpeg to extract audio to a temp WAV file before loading into wavesurfer. This prevents the WebView crash caused by trying to fetch multi-GB files into memory. - New extract_audio Tauri command uses ffmpeg (sidecar-bundled or system) - Frontend detects video extensions and extracts audio automatically - User-friendly error if ffmpeg is not installed with install instructions - Reverted wavesurfer MediaElement approach in favor of clean extraction - Added FFmpeg install guide to USER_GUIDE.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,10 +26,13 @@ The sidecar only needs to be downloaded once. Updates are detected automatically
|
|||||||
|
|
||||||
## Basic Workflow
|
## Basic Workflow
|
||||||
|
|
||||||
### 1. Import Audio
|
### 1. Import Audio or Video
|
||||||
|
|
||||||
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
|
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
|
||||||
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM
|
- **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA
|
||||||
|
- **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted
|
||||||
|
|
||||||
|
> **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system.
|
||||||
|
|
||||||
### 2. Transcribe
|
### 2. Transcribe
|
||||||
|
|
||||||
@@ -181,8 +184,42 @@ If you prefer cloud-based AI:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Installing FFmpeg
|
||||||
|
|
||||||
|
FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription.
|
||||||
|
|
||||||
|
**Windows:**
|
||||||
|
```
|
||||||
|
winget install ffmpeg
|
||||||
|
```
|
||||||
|
Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH.
|
||||||
|
|
||||||
|
**macOS:**
|
||||||
|
```
|
||||||
|
brew install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linux (Debian/Ubuntu):**
|
||||||
|
```
|
||||||
|
sudo apt install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linux (Fedora/RHEL):**
|
||||||
|
```
|
||||||
|
sudo dnf install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Video import fails / "FFmpeg not found"
|
||||||
|
- Install FFmpeg using the instructions above
|
||||||
|
- Make sure `ffmpeg` is in your system PATH
|
||||||
|
- Restart Voice to Notes after installing
|
||||||
|
|
||||||
### Transcription is slow
|
### Transcription is slow
|
||||||
- Use a smaller model (tiny or base)
|
- Use a smaller model (tiny or base)
|
||||||
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
|
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
|
||||||
|
|||||||
95
src-tauri/src/commands/media.rs
Normal file
95
src-tauri/src/commands/media.rs
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
use std::path::PathBuf;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
/// Extract audio from a video file to a WAV file using ffmpeg.
|
||||||
|
/// Returns the path to the extracted audio file.
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn extract_audio(file_path: String) -> Result<String, String> {
|
||||||
|
let input = PathBuf::from(&file_path);
|
||||||
|
if !input.exists() {
|
||||||
|
return Err(format!("File not found: {}", file_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output to a temp WAV file next to the original or in temp dir
|
||||||
|
let stem = input.file_stem().unwrap_or_default().to_string_lossy();
|
||||||
|
let output = std::env::temp_dir().join(format!("{stem}_audio.wav"));
|
||||||
|
|
||||||
|
eprintln!(
|
||||||
|
"[media] Extracting audio: {} -> {}",
|
||||||
|
input.display(),
|
||||||
|
output.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Find ffmpeg — check sidecar extract dir first, then system PATH
|
||||||
|
let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?;
|
||||||
|
|
||||||
|
let status = Command::new(&ffmpeg)
|
||||||
|
.args([
|
||||||
|
"-y", // Overwrite output
|
||||||
|
"-i",
|
||||||
|
&file_path,
|
||||||
|
"-vn", // No video
|
||||||
|
"-acodec",
|
||||||
|
"pcm_s16le", // WAV PCM 16-bit
|
||||||
|
"-ar",
|
||||||
|
"16000", // 16kHz (optimal for whisper)
|
||||||
|
"-ac",
|
||||||
|
"1", // Mono
|
||||||
|
])
|
||||||
|
.arg(output.to_str().unwrap())
|
||||||
|
.stdout(std::process::Stdio::null())
|
||||||
|
.stderr(std::process::Stdio::piped())
|
||||||
|
.status()
|
||||||
|
.map_err(|e| format!("Failed to run ffmpeg: {e}"))?;
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
return Err(format!("ffmpeg exited with status {status}"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !output.exists() {
|
||||||
|
return Err("ffmpeg completed but output file not found".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!("[media] Audio extracted successfully");
|
||||||
|
Ok(output.to_string_lossy().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find ffmpeg binary — check sidecar directory first, then system PATH.
|
||||||
|
fn find_ffmpeg() -> Option<String> {
|
||||||
|
// Check sidecar extract dir (ffmpeg is bundled with the sidecar)
|
||||||
|
if let Some(data_dir) = crate::sidecar::DATA_DIR.get() {
|
||||||
|
// Read sidecar version to find the right directory
|
||||||
|
let version_file = data_dir.join("sidecar-version.txt");
|
||||||
|
if let Ok(version) = std::fs::read_to_string(&version_file) {
|
||||||
|
let version = version.trim();
|
||||||
|
let sidecar_dir = data_dir.join(format!("sidecar-{version}"));
|
||||||
|
let ffmpeg_name = if cfg!(target_os = "windows") {
|
||||||
|
"ffmpeg.exe"
|
||||||
|
} else {
|
||||||
|
"ffmpeg"
|
||||||
|
};
|
||||||
|
let ffmpeg_path = sidecar_dir.join(ffmpeg_name);
|
||||||
|
if ffmpeg_path.exists() {
|
||||||
|
return Some(ffmpeg_path.to_string_lossy().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to system PATH
|
||||||
|
let ffmpeg_name = if cfg!(target_os = "windows") {
|
||||||
|
"ffmpeg.exe"
|
||||||
|
} else {
|
||||||
|
"ffmpeg"
|
||||||
|
};
|
||||||
|
if Command::new(ffmpeg_name)
|
||||||
|
.arg("-version")
|
||||||
|
.stdout(std::process::Stdio::null())
|
||||||
|
.stderr(std::process::Stdio::null())
|
||||||
|
.status()
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
|
return Some(ffmpeg_name.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
pub mod ai;
|
pub mod ai;
|
||||||
pub mod export;
|
pub mod export;
|
||||||
|
pub mod media;
|
||||||
pub mod project;
|
pub mod project;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod sidecar;
|
pub mod sidecar;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use tauri::Manager;
|
|||||||
|
|
||||||
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
||||||
use commands::export::export_transcript;
|
use commands::export::export_transcript;
|
||||||
|
use commands::media::extract_audio;
|
||||||
use commands::project::{
|
use commands::project::{
|
||||||
create_project, delete_project, get_project, list_projects, load_project_file,
|
create_project, delete_project, get_project, list_projects, load_project_file,
|
||||||
load_project_transcript, save_project_file, save_project_transcript, update_segment,
|
load_project_transcript, save_project_file, save_project_transcript, update_segment,
|
||||||
@@ -73,6 +74,7 @@ pub fn run() {
|
|||||||
check_sidecar_update,
|
check_sidecar_update,
|
||||||
log_frontend,
|
log_frontend,
|
||||||
toggle_devtools,
|
toggle_devtools,
|
||||||
|
extract_audio,
|
||||||
])
|
])
|
||||||
.run(tauri::generate_context!())
|
.run(tauri::generate_context!())
|
||||||
.expect("error while running tauri application");
|
.expect("error while running tauri application");
|
||||||
|
|||||||
@@ -57,6 +57,12 @@
|
|||||||
isReady = false;
|
isReady = false;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
wavesurfer.on('error', (err: Error) => {
|
||||||
|
console.error('[voice-to-notes] WaveSurfer error:', err);
|
||||||
|
isLoading = false;
|
||||||
|
loadError = 'Failed to load audio';
|
||||||
|
});
|
||||||
|
|
||||||
if (audioUrl) {
|
if (audioUrl) {
|
||||||
loadAudio(audioUrl);
|
loadAudio(audioUrl);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,6 +254,8 @@
|
|||||||
// Changes persist when user saves the project file.
|
// Changes persist when user saves the project file.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const VIDEO_EXTENSIONS = ['mp4', 'mkv', 'avi', 'mov', 'webm'];
|
||||||
|
|
||||||
async function handleFileImport() {
|
async function handleFileImport() {
|
||||||
const filePath = await open({
|
const filePath = await open({
|
||||||
multiple: false,
|
multiple: false,
|
||||||
@@ -265,9 +267,34 @@
|
|||||||
});
|
});
|
||||||
if (!filePath) return;
|
if (!filePath) return;
|
||||||
|
|
||||||
// Track the original file path and convert to asset URL for wavesurfer
|
// For video files, extract audio first using ffmpeg
|
||||||
|
const ext = filePath.split('.').pop()?.toLowerCase() ?? '';
|
||||||
|
let audioPath = filePath;
|
||||||
|
if (VIDEO_EXTENSIONS.includes(ext)) {
|
||||||
|
try {
|
||||||
|
audioPath = await invoke<string>('extract_audio', { filePath });
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[voice-to-notes] Failed to extract audio:', err);
|
||||||
|
const msg = String(err);
|
||||||
|
if (msg.includes('ffmpeg not found')) {
|
||||||
|
alert(
|
||||||
|
'FFmpeg is required to import video files.\n\n' +
|
||||||
|
'Install FFmpeg:\n' +
|
||||||
|
' Windows: winget install ffmpeg\n' +
|
||||||
|
' macOS: brew install ffmpeg\n' +
|
||||||
|
' Linux: sudo apt install ffmpeg\n\n' +
|
||||||
|
'Then restart Voice to Notes and try again.'
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
alert(`Failed to extract audio from video: ${msg}`);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track the original file path (video or audio) for the sidecar
|
||||||
audioFilePath = filePath;
|
audioFilePath = filePath;
|
||||||
audioUrl = convertFileSrc(filePath);
|
audioUrl = convertFileSrc(audioPath);
|
||||||
waveformPlayer?.loadAudio(audioUrl);
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
|
||||||
// Clear previous results
|
// Clear previous results
|
||||||
|
|||||||
Reference in New Issue
Block a user