Compare commits
31 Commits
sidecar-v1
...
v0.2.43
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f44bdd037 | ||
|
|
32bfbd3791 | ||
|
|
2bfb1b276e | ||
|
|
908762073f | ||
|
|
2011015c9a | ||
|
|
fc5cfc4374 | ||
|
|
ac0fe3b4c7 | ||
|
|
e05f9afaff | ||
|
|
548d260061 | ||
|
|
168a43e0e1 | ||
|
|
543decd769 | ||
|
|
e05f88eecf | ||
|
|
fee1255cac | ||
|
|
2e9f2519b1 | ||
|
|
82bfcfb793 | ||
|
|
73eab2e80c | ||
|
|
33ca3e4a28 | ||
|
|
e65d8b0510 | ||
|
|
a7364f2e50 | ||
|
|
809acfc781 | ||
|
|
96e9a6d38b | ||
|
|
ddfbd65478 | ||
|
|
e80ee3a18f | ||
|
|
806586ae3d | ||
|
|
999bdaa671 | ||
|
|
b1d46fd42e | ||
|
|
818cbfa69c | ||
|
|
aa319eb823 | ||
|
|
8faa336cbc | ||
|
|
02c70f90c8 | ||
|
|
66db827f17 |
@@ -26,10 +26,13 @@ The sidecar only needs to be downloaded once. Updates are detected automatically
|
|||||||
|
|
||||||
## Basic Workflow
|
## Basic Workflow
|
||||||
|
|
||||||
### 1. Import Audio
|
### 1. Import Audio or Video
|
||||||
|
|
||||||
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
|
- Click **Import Audio** or press **Ctrl+O** (Cmd+O on Mac)
|
||||||
- Supported formats: MP3, WAV, FLAC, OGG, M4A, AAC, WMA, MP4, MKV, AVI, MOV, WebM
|
- **Audio formats:** MP3, WAV, FLAC, OGG, M4A, AAC, WMA
|
||||||
|
- **Video formats:** MP4, MKV, AVI, MOV, WebM — audio is automatically extracted
|
||||||
|
|
||||||
|
> **Note:** Video file import requires [FFmpeg](#installing-ffmpeg) to be installed on your system.
|
||||||
|
|
||||||
### 2. Transcribe
|
### 2. Transcribe
|
||||||
|
|
||||||
@@ -181,8 +184,42 @@ If you prefer cloud-based AI:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Installing FFmpeg
|
||||||
|
|
||||||
|
FFmpeg is required for importing video files (MP4, MKV, AVI, etc.). It's used to extract the audio track before transcription.
|
||||||
|
|
||||||
|
**Windows:**
|
||||||
|
```
|
||||||
|
winget install ffmpeg
|
||||||
|
```
|
||||||
|
Or download from [ffmpeg.org/download.html](https://ffmpeg.org/download.html) and add to your PATH.
|
||||||
|
|
||||||
|
**macOS:**
|
||||||
|
```
|
||||||
|
brew install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linux (Debian/Ubuntu):**
|
||||||
|
```
|
||||||
|
sudo apt install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
**Linux (Fedora/RHEL):**
|
||||||
|
```
|
||||||
|
sudo dnf install ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
|
After installing, restart Voice to Notes. FFmpeg is not needed for audio-only files (MP3, WAV, FLAC, etc.).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Video import fails / "FFmpeg not found"
|
||||||
|
- Install FFmpeg using the instructions above
|
||||||
|
- Make sure `ffmpeg` is in your system PATH
|
||||||
|
- Restart Voice to Notes after installing
|
||||||
|
|
||||||
### Transcription is slow
|
### Transcription is slow
|
||||||
- Use a smaller model (tiny or base)
|
- Use a smaller model (tiny or base)
|
||||||
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
|
- If you have an NVIDIA GPU, select CUDA in Settings > Transcription > Device
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "voice-to-notes",
|
"name": "voice-to-notes",
|
||||||
"version": "0.2.28",
|
"version": "0.2.43",
|
||||||
"description": "Desktop app for transcribing audio/video with speaker identification",
|
"description": "Desktop app for transcribing audio/video with speaker identification",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "voice-to-notes"
|
name = "voice-to-notes"
|
||||||
version = "1.0.11"
|
version = "1.0.13"
|
||||||
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
|
description = "Python sidecar for Voice to Notes — transcription, diarization, and AI services"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|||||||
@@ -41,14 +41,23 @@ def _patch_pyannote_audio() -> None:
|
|||||||
import torch
|
import torch
|
||||||
from pyannote.audio.core.io import Audio
|
from pyannote.audio.core.io import Audio
|
||||||
|
|
||||||
|
# Cache loaded audio to avoid re-reading the entire file for every crop call.
|
||||||
|
# For a 3-hour file, crop is called 1000+ times — without caching, each call
|
||||||
|
# reads ~345MB from disk.
|
||||||
|
_audio_cache: dict[str, tuple] = {}
|
||||||
|
|
||||||
def _sf_load(audio_path: str) -> tuple:
|
def _sf_load(audio_path: str) -> tuple:
|
||||||
"""Load audio via soundfile, return (channels, samples) tensor + sample_rate."""
|
"""Load audio via soundfile with caching."""
|
||||||
data, sample_rate = sf.read(str(audio_path), dtype="float32")
|
key = str(audio_path)
|
||||||
|
if key in _audio_cache:
|
||||||
|
return _audio_cache[key]
|
||||||
|
data, sample_rate = sf.read(key, dtype="float32")
|
||||||
waveform = torch.from_numpy(np.array(data))
|
waveform = torch.from_numpy(np.array(data))
|
||||||
if waveform.ndim == 1:
|
if waveform.ndim == 1:
|
||||||
waveform = waveform.unsqueeze(0)
|
waveform = waveform.unsqueeze(0)
|
||||||
else:
|
else:
|
||||||
waveform = waveform.T
|
waveform = waveform.T
|
||||||
|
_audio_cache[key] = (waveform, sample_rate)
|
||||||
return waveform, sample_rate
|
return waveform, sample_rate
|
||||||
|
|
||||||
def _soundfile_call(self, file: dict) -> tuple:
|
def _soundfile_call(self, file: dict) -> tuple:
|
||||||
@@ -56,7 +65,7 @@ def _patch_pyannote_audio() -> None:
|
|||||||
return _sf_load(file["audio"])
|
return _sf_load(file["audio"])
|
||||||
|
|
||||||
def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
|
def _soundfile_crop(self, file: dict, segment, **kwargs) -> tuple:
|
||||||
"""Replacement for Audio.crop — load full file then slice.
|
"""Replacement for Audio.crop — load file once (cached) then slice.
|
||||||
|
|
||||||
Pads short segments with zeros to match the expected duration,
|
Pads short segments with zeros to match the expected duration,
|
||||||
which pyannote requires for batched embedding extraction.
|
which pyannote requires for batched embedding extraction.
|
||||||
@@ -279,13 +288,20 @@ class DiarizeService:
|
|||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
elapsed = 0.0
|
elapsed = 0.0
|
||||||
estimated_total = max(audio_duration_sec * 0.5, 30.0) if audio_duration_sec else 120.0
|
estimated_total = max(audio_duration_sec * 0.8, 30.0) if audio_duration_sec else 120.0
|
||||||
while not done_event.wait(timeout=2.0):
|
duration_str = ""
|
||||||
elapsed += 2.0
|
if audio_duration_sec and audio_duration_sec > 600:
|
||||||
|
mins = int(audio_duration_sec / 60)
|
||||||
|
duration_str = f" ({mins}min audio, this may take a while)"
|
||||||
|
while not done_event.wait(timeout=5.0):
|
||||||
|
elapsed += 5.0
|
||||||
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
|
pct = min(20 + int((elapsed / estimated_total) * 65), 85)
|
||||||
|
elapsed_min = int(elapsed / 60)
|
||||||
|
elapsed_sec = int(elapsed % 60)
|
||||||
|
time_str = f"{elapsed_min}m{elapsed_sec:02d}s" if elapsed_min > 0 else f"{int(elapsed)}s"
|
||||||
write_message(progress_message(
|
write_message(progress_message(
|
||||||
request_id, pct, "diarizing",
|
request_id, pct, "diarizing",
|
||||||
f"Analyzing speakers ({int(elapsed)}s elapsed)..."))
|
f"Analyzing speakers ({time_str} elapsed){duration_str}"))
|
||||||
|
|
||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
|
|||||||
@@ -113,17 +113,22 @@ class TranscribeService:
|
|||||||
compute_type: str = "int8",
|
compute_type: str = "int8",
|
||||||
language: str | None = None,
|
language: str | None = None,
|
||||||
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
on_segment: Callable[[SegmentResult, int], None] | None = None,
|
||||||
|
chunk_label: str | None = None,
|
||||||
) -> TranscriptionResult:
|
) -> TranscriptionResult:
|
||||||
"""Transcribe an audio file with word-level timestamps.
|
"""Transcribe an audio file with word-level timestamps.
|
||||||
|
|
||||||
Sends progress messages via IPC during processing.
|
Sends progress messages via IPC during processing.
|
||||||
|
If chunk_label is set (e.g. "chunk 3/12"), messages are prefixed with it.
|
||||||
"""
|
"""
|
||||||
# Stage: loading model
|
prefix = f"{chunk_label}: " if chunk_label else ""
|
||||||
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
|
|
||||||
|
# Stage: loading model (skip for chunks after the first — model already loaded)
|
||||||
|
if not chunk_label:
|
||||||
|
write_message(progress_message(request_id, 0, "loading_model", f"Loading {model_name}..."))
|
||||||
model = self._ensure_model(model_name, device, compute_type)
|
model = self._ensure_model(model_name, device, compute_type)
|
||||||
|
|
||||||
# Stage: transcribing
|
# Stage: transcribing
|
||||||
write_message(progress_message(request_id, 10, "transcribing", "Starting transcription..."))
|
write_message(progress_message(request_id, 10, "transcribing", f"{prefix}Starting transcription..."))
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
segments_iter, info = model.transcribe(
|
segments_iter, info = model.transcribe(
|
||||||
@@ -176,7 +181,7 @@ class TranscribeService:
|
|||||||
request_id,
|
request_id,
|
||||||
progress_pct,
|
progress_pct,
|
||||||
"transcribing",
|
"transcribing",
|
||||||
f"Transcribing segment {segment_count} ({progress_pct}% of audio)...",
|
f"{prefix}Transcribing segment {segment_count} ({progress_pct}% of audio)...",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -271,6 +276,7 @@ class TranscribeService:
|
|||||||
chunk_result = self.transcribe(
|
chunk_result = self.transcribe(
|
||||||
request_id, tmp.name, model_name, device,
|
request_id, tmp.name, model_name, device,
|
||||||
compute_type, language, on_segment=chunk_on_segment,
|
compute_type, language, on_segment=chunk_on_segment,
|
||||||
|
chunk_label=f"Chunk {chunk_idx + 1}/{num_chunks}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Offset timestamps and merge
|
# Offset timestamps and merge
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "voice-to-notes"
|
name = "voice-to-notes"
|
||||||
version = "0.2.28"
|
version = "0.2.43"
|
||||||
description = "Voice to Notes — desktop transcription with speaker identification"
|
description = "Voice to Notes — desktop transcription with speaker identification"
|
||||||
authors = ["Voice to Notes Contributors"]
|
authors = ["Voice to Notes Contributors"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|||||||
124
src-tauri/src/commands/media.rs
Normal file
124
src-tauri/src/commands/media.rs
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
use std::path::PathBuf;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
use std::os::windows::process::CommandExt;
|
||||||
|
|
||||||
|
/// Extract audio from a video file to a WAV file using ffmpeg.
|
||||||
|
/// Returns the path to the extracted audio file.
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn extract_audio(file_path: String, output_path: Option<String>) -> Result<String, String> {
|
||||||
|
let input = PathBuf::from(&file_path);
|
||||||
|
if !input.exists() {
|
||||||
|
return Err(format!("File not found: {}", file_path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use provided output path, or fall back to a temp WAV file
|
||||||
|
let stem = input.file_stem().unwrap_or_default().to_string_lossy();
|
||||||
|
let output = match output_path {
|
||||||
|
Some(ref p) => PathBuf::from(p),
|
||||||
|
None => std::env::temp_dir().join(format!("{stem}_audio.wav")),
|
||||||
|
};
|
||||||
|
|
||||||
|
eprintln!(
|
||||||
|
"[media] Extracting audio: {} -> {}",
|
||||||
|
input.display(),
|
||||||
|
output.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Find ffmpeg — check sidecar extract dir first, then system PATH
|
||||||
|
let ffmpeg = find_ffmpeg().ok_or("ffmpeg not found. Install ffmpeg or ensure it's in PATH.")?;
|
||||||
|
|
||||||
|
let mut cmd = Command::new(&ffmpeg);
|
||||||
|
cmd.args([
|
||||||
|
"-y", // Overwrite output
|
||||||
|
"-i",
|
||||||
|
&file_path,
|
||||||
|
"-vn", // No video
|
||||||
|
"-acodec",
|
||||||
|
"pcm_s16le", // WAV PCM 16-bit
|
||||||
|
"-ar",
|
||||||
|
"22050", // 22kHz mono for better playback quality
|
||||||
|
"-ac",
|
||||||
|
"1", // Mono
|
||||||
|
])
|
||||||
|
.arg(output.to_str().unwrap())
|
||||||
|
.stdout(std::process::Stdio::null())
|
||||||
|
.stderr(std::process::Stdio::piped());
|
||||||
|
|
||||||
|
// Hide the console window on Windows (CREATE_NO_WINDOW = 0x08000000)
|
||||||
|
#[cfg(target_os = "windows")]
|
||||||
|
cmd.creation_flags(0x08000000);
|
||||||
|
|
||||||
|
let status = cmd
|
||||||
|
.status()
|
||||||
|
.map_err(|e| format!("Failed to run ffmpeg: {e}"))?;
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
return Err(format!("ffmpeg exited with status {status}"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !output.exists() {
|
||||||
|
return Err("ffmpeg completed but output file not found".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!("[media] Audio extracted successfully");
|
||||||
|
Ok(output.to_string_lossy().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn check_file_exists(path: String) -> bool {
|
||||||
|
std::path::Path::new(&path).exists()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn copy_file(src: String, dst: String) -> Result<(), String> {
|
||||||
|
std::fs::copy(&src, &dst).map_err(|e| format!("Failed to copy file: {e}"))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub fn create_dir(path: String) -> Result<(), String> {
|
||||||
|
std::fs::create_dir_all(&path).map_err(|e| format!("Failed to create directory: {e}"))?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find ffmpeg binary — check sidecar directory first, then system PATH.
|
||||||
|
fn find_ffmpeg() -> Option<String> {
|
||||||
|
// Check sidecar extract dir (ffmpeg is bundled with the sidecar)
|
||||||
|
if let Some(data_dir) = crate::sidecar::DATA_DIR.get() {
|
||||||
|
// Read sidecar version to find the right directory
|
||||||
|
let version_file = data_dir.join("sidecar-version.txt");
|
||||||
|
if let Ok(version) = std::fs::read_to_string(&version_file) {
|
||||||
|
let version = version.trim();
|
||||||
|
let sidecar_dir = data_dir.join(format!("sidecar-{version}"));
|
||||||
|
let ffmpeg_name = if cfg!(target_os = "windows") {
|
||||||
|
"ffmpeg.exe"
|
||||||
|
} else {
|
||||||
|
"ffmpeg"
|
||||||
|
};
|
||||||
|
let ffmpeg_path = sidecar_dir.join(ffmpeg_name);
|
||||||
|
if ffmpeg_path.exists() {
|
||||||
|
return Some(ffmpeg_path.to_string_lossy().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to system PATH
|
||||||
|
let ffmpeg_name = if cfg!(target_os = "windows") {
|
||||||
|
"ffmpeg.exe"
|
||||||
|
} else {
|
||||||
|
"ffmpeg"
|
||||||
|
};
|
||||||
|
if Command::new(ffmpeg_name)
|
||||||
|
.arg("-version")
|
||||||
|
.stdout(std::process::Stdio::null())
|
||||||
|
.stderr(std::process::Stdio::null())
|
||||||
|
.status()
|
||||||
|
.is_ok()
|
||||||
|
{
|
||||||
|
return Some(ffmpeg_name.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
pub mod ai;
|
pub mod ai;
|
||||||
pub mod export;
|
pub mod export;
|
||||||
|
pub mod media;
|
||||||
pub mod project;
|
pub mod project;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod sidecar;
|
pub mod sidecar;
|
||||||
|
|||||||
@@ -12,7 +12,12 @@ use crate::state::AppState;
|
|||||||
pub struct ProjectFile {
|
pub struct ProjectFile {
|
||||||
pub version: u32,
|
pub version: u32,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub audio_file: String,
|
#[serde(default)]
|
||||||
|
pub audio_file: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub source_file: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub audio_wav: Option<String>,
|
||||||
pub created_at: String,
|
pub created_at: String,
|
||||||
pub segments: Vec<ProjectFileSegment>,
|
pub segments: Vec<ProjectFileSegment>,
|
||||||
pub speakers: Vec<ProjectFileSpeaker>,
|
pub speakers: Vec<ProjectFileSpeaker>,
|
||||||
|
|||||||
@@ -197,15 +197,21 @@ pub async fn download_sidecar(app: AppHandle, variant: String) -> Result<(), Str
|
|||||||
let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
|
let extract_dir = data_dir.join(format!("sidecar-{}", sidecar_version));
|
||||||
SidecarManager::extract_zip(&zip_path, &extract_dir)?;
|
SidecarManager::extract_zip(&zip_path, &extract_dir)?;
|
||||||
|
|
||||||
// Make the binary executable on Unix
|
// Make all binaries executable on Unix (sidecar, ffmpeg, ffprobe, etc.)
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
{
|
{
|
||||||
use std::os::unix::fs::PermissionsExt;
|
use std::os::unix::fs::PermissionsExt;
|
||||||
let binary_path = extract_dir.join("voice-to-notes-sidecar");
|
if let Ok(entries) = std::fs::read_dir(&extract_dir) {
|
||||||
if let Ok(meta) = std::fs::metadata(&binary_path) {
|
for entry in entries.flatten() {
|
||||||
let mut perms = meta.permissions();
|
let path = entry.path();
|
||||||
perms.set_mode(0o755);
|
if path.is_file() {
|
||||||
let _ = std::fs::set_permissions(&binary_path, perms);
|
if let Ok(meta) = std::fs::metadata(&path) {
|
||||||
|
let mut perms = meta.permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
let _ = std::fs::set_permissions(&path, perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use tauri::Manager;
|
|||||||
|
|
||||||
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
|
||||||
use commands::export::export_transcript;
|
use commands::export::export_transcript;
|
||||||
|
use commands::media::{check_file_exists, copy_file, create_dir, extract_audio};
|
||||||
use commands::project::{
|
use commands::project::{
|
||||||
create_project, delete_project, get_project, list_projects, load_project_file,
|
create_project, delete_project, get_project, list_projects, load_project_file,
|
||||||
load_project_transcript, save_project_file, save_project_transcript, update_segment,
|
load_project_transcript, save_project_file, save_project_transcript, update_segment,
|
||||||
@@ -73,6 +74,10 @@ pub fn run() {
|
|||||||
check_sidecar_update,
|
check_sidecar_update,
|
||||||
log_frontend,
|
log_frontend,
|
||||||
toggle_devtools,
|
toggle_devtools,
|
||||||
|
extract_audio,
|
||||||
|
check_file_exists,
|
||||||
|
copy_file,
|
||||||
|
create_dir,
|
||||||
])
|
])
|
||||||
.run(tauri::generate_context!())
|
.run(tauri::generate_context!())
|
||||||
.expect("error while running tauri application");
|
.expect("error while running tauri application");
|
||||||
|
|||||||
@@ -113,14 +113,21 @@ impl SidecarManager {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make executable on Unix
|
// Make all binaries executable on Unix (sidecar, ffmpeg, ffprobe, etc.)
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
{
|
{
|
||||||
use std::os::unix::fs::PermissionsExt;
|
use std::os::unix::fs::PermissionsExt;
|
||||||
if let Ok(meta) = std::fs::metadata(&binary_path) {
|
if let Ok(entries) = std::fs::read_dir(&extract_dir) {
|
||||||
let mut perms = meta.permissions();
|
for entry in entries.flatten() {
|
||||||
perms.set_mode(0o755);
|
let path = entry.path();
|
||||||
let _ = std::fs::set_permissions(&binary_path, perms);
|
if path.is_file() {
|
||||||
|
if let Ok(meta) = std::fs::metadata(&path) {
|
||||||
|
let mut perms = meta.permissions();
|
||||||
|
perms.set_mode(0o755);
|
||||||
|
let _ = std::fs::set_permissions(&path, perms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://schema.tauri.app/config/2",
|
"$schema": "https://schema.tauri.app/config/2",
|
||||||
"productName": "Voice to Notes",
|
"productName": "Voice to Notes",
|
||||||
"version": "0.2.28",
|
"version": "0.2.43",
|
||||||
"identifier": "com.voicetonotes.app",
|
"identifier": "com.voicetonotes.app",
|
||||||
"build": {
|
"build": {
|
||||||
"beforeDevCommand": "npm run dev",
|
"beforeDevCommand": "npm run dev",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { invoke } from '@tauri-apps/api/core';
|
import { invoke } from '@tauri-apps/api/core';
|
||||||
import { segments, speakers } from '$lib/stores/transcript';
|
import { segments, speakers } from '$lib/stores/transcript';
|
||||||
import { settings } from '$lib/stores/settings';
|
import { settings, configureAIProvider } from '$lib/stores/settings';
|
||||||
|
|
||||||
interface ChatMessage {
|
interface ChatMessage {
|
||||||
role: 'user' | 'assistant';
|
role: 'user' | 'assistant';
|
||||||
@@ -45,22 +45,12 @@
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
// Ensure the provider is configured with current credentials before chatting
|
// Ensure the provider is configured with current credentials before chatting
|
||||||
const s = $settings;
|
await configureAIProvider($settings);
|
||||||
const configMap: Record<string, Record<string, string>> = {
|
|
||||||
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
|
||||||
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
|
||||||
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
|
||||||
local: { model: s.local_model_path, base_url: 'http://localhost:8080' },
|
|
||||||
};
|
|
||||||
const config = configMap[s.ai_provider];
|
|
||||||
if (config) {
|
|
||||||
await invoke('ai_configure', { provider: s.ai_provider, config });
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = await invoke<{ response: string }>('ai_chat', {
|
const result = await invoke<{ response: string }>('ai_chat', {
|
||||||
messages: chatMessages,
|
messages: chatMessages,
|
||||||
transcriptContext: getTranscriptContext(),
|
transcriptContext: getTranscriptContext(),
|
||||||
provider: s.ai_provider,
|
provider: $settings.ai_provider,
|
||||||
});
|
});
|
||||||
|
|
||||||
messages = [...messages, { role: 'assistant', content: result.response }];
|
messages = [...messages, { role: 'assistant', content: result.response }];
|
||||||
|
|||||||
@@ -4,9 +4,25 @@
|
|||||||
percent?: number;
|
percent?: number;
|
||||||
stage?: string;
|
stage?: string;
|
||||||
message?: string;
|
message?: string;
|
||||||
|
onCancel?: () => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
let { visible = false, percent = 0, stage = '', message = '' }: Props = $props();
|
let { visible = false, percent = 0, stage = '', message = '', onCancel }: Props = $props();
|
||||||
|
|
||||||
|
let showConfirm = $state(false);
|
||||||
|
|
||||||
|
function handleCancelClick() {
|
||||||
|
showConfirm = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function confirmCancel() {
|
||||||
|
showConfirm = false;
|
||||||
|
onCancel?.();
|
||||||
|
}
|
||||||
|
|
||||||
|
function dismissCancel() {
|
||||||
|
showConfirm = false;
|
||||||
|
}
|
||||||
|
|
||||||
// Pipeline steps in order
|
// Pipeline steps in order
|
||||||
const pipelineSteps = [
|
const pipelineSteps = [
|
||||||
@@ -89,6 +105,20 @@
|
|||||||
|
|
||||||
<p class="status-text">{message || 'Please wait...'}</p>
|
<p class="status-text">{message || 'Please wait...'}</p>
|
||||||
<p class="hint-text">This may take several minutes for large files</p>
|
<p class="hint-text">This may take several minutes for large files</p>
|
||||||
|
|
||||||
|
{#if onCancel && !showConfirm}
|
||||||
|
<button class="cancel-btn" onclick={handleCancelClick}>Cancel</button>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
{#if showConfirm}
|
||||||
|
<div class="confirm-box">
|
||||||
|
<p class="confirm-text">Processing is incomplete. If you cancel now, the transcription will need to be started over.</p>
|
||||||
|
<div class="confirm-actions">
|
||||||
|
<button class="confirm-keep" onclick={dismissCancel}>Continue Processing</button>
|
||||||
|
<button class="confirm-cancel" onclick={confirmCancel}>Cancel Processing</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -174,4 +204,62 @@
|
|||||||
font-size: 0.75rem;
|
font-size: 0.75rem;
|
||||||
color: #555;
|
color: #555;
|
||||||
}
|
}
|
||||||
|
.cancel-btn {
|
||||||
|
margin-top: 1.25rem;
|
||||||
|
width: 100%;
|
||||||
|
padding: 0.5rem;
|
||||||
|
background: none;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
color: #999;
|
||||||
|
border-radius: 6px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
.cancel-btn:hover {
|
||||||
|
color: #e0e0e0;
|
||||||
|
border-color: #e94560;
|
||||||
|
}
|
||||||
|
.confirm-box {
|
||||||
|
margin-top: 1.25rem;
|
||||||
|
padding: 0.75rem;
|
||||||
|
background: rgba(233, 69, 96, 0.08);
|
||||||
|
border: 1px solid #e94560;
|
||||||
|
border-radius: 6px;
|
||||||
|
}
|
||||||
|
.confirm-text {
|
||||||
|
margin: 0 0 0.75rem;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #e0e0e0;
|
||||||
|
line-height: 1.4;
|
||||||
|
}
|
||||||
|
.confirm-actions {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
.confirm-keep {
|
||||||
|
flex: 1;
|
||||||
|
padding: 0.4rem;
|
||||||
|
background: #0f3460;
|
||||||
|
border: 1px solid #4a5568;
|
||||||
|
color: #e0e0e0;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
.confirm-keep:hover {
|
||||||
|
background: #1a4a7a;
|
||||||
|
}
|
||||||
|
.confirm-cancel {
|
||||||
|
flex: 1;
|
||||||
|
padding: 0.4rem;
|
||||||
|
background: #e94560;
|
||||||
|
border: none;
|
||||||
|
color: white;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
.confirm-cancel:hover {
|
||||||
|
background: #d63851;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -57,6 +57,12 @@
|
|||||||
isReady = false;
|
isReady = false;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
wavesurfer.on('error', (err: Error) => {
|
||||||
|
console.error('[voice-to-notes] WaveSurfer error:', err);
|
||||||
|
isLoading = false;
|
||||||
|
loadError = 'Failed to load audio';
|
||||||
|
});
|
||||||
|
|
||||||
if (audioUrl) {
|
if (audioUrl) {
|
||||||
loadAudio(audioUrl);
|
loadAudio(audioUrl);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,23 +52,27 @@ export async function loadSettings(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function saveSettings(s: AppSettings): Promise<void> {
|
export async function configureAIProvider(s: AppSettings): Promise<void> {
|
||||||
settings.set(s);
|
|
||||||
await invoke('save_settings', { settings: s });
|
|
||||||
|
|
||||||
// Configure the AI provider in the Python sidecar
|
|
||||||
const configMap: Record<string, Record<string, string>> = {
|
const configMap: Record<string, Record<string, string>> = {
|
||||||
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
openai: { api_key: s.openai_api_key, model: s.openai_model },
|
||||||
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
anthropic: { api_key: s.anthropic_api_key, model: s.anthropic_model },
|
||||||
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
litellm: { api_key: s.litellm_api_key, api_base: s.litellm_api_base, model: s.litellm_model },
|
||||||
local: { model: s.ollama_model, base_url: s.ollama_url + '/v1' },
|
local: { model: s.ollama_model, base_url: s.ollama_url.replace(/\/+$/, '') + '/v1' },
|
||||||
};
|
};
|
||||||
const config = configMap[s.ai_provider];
|
const config = configMap[s.ai_provider];
|
||||||
if (config) {
|
if (config) {
|
||||||
try {
|
try {
|
||||||
await invoke('ai_configure', { provider: s.ai_provider, config });
|
await invoke('ai_configure', { provider: s.ai_provider, config });
|
||||||
} catch {
|
} catch {
|
||||||
// Sidecar may not be running yet — provider will be configured on first use
|
// Sidecar may not be running yet
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function saveSettings(s: AppSettings): Promise<void> {
|
||||||
|
settings.set(s);
|
||||||
|
await invoke('save_settings', { settings: s });
|
||||||
|
|
||||||
|
// Configure the AI provider in the Python sidecar
|
||||||
|
await configureAIProvider(s);
|
||||||
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
import SettingsModal from '$lib/components/SettingsModal.svelte';
|
import SettingsModal from '$lib/components/SettingsModal.svelte';
|
||||||
import SidecarSetup from '$lib/components/SidecarSetup.svelte';
|
import SidecarSetup from '$lib/components/SidecarSetup.svelte';
|
||||||
import { segments, speakers } from '$lib/stores/transcript';
|
import { segments, speakers } from '$lib/stores/transcript';
|
||||||
import { settings, loadSettings } from '$lib/stores/settings';
|
import { settings, loadSettings, configureAIProvider } from '$lib/stores/settings';
|
||||||
import type { Segment, Speaker } from '$lib/types/transcript';
|
import type { Segment, Speaker } from '$lib/types/transcript';
|
||||||
import { onMount, tick } from 'svelte';
|
import { onMount, tick } from 'svelte';
|
||||||
|
|
||||||
@@ -31,7 +31,9 @@
|
|||||||
// Project management state
|
// Project management state
|
||||||
let currentProjectPath = $state<string | null>(null);
|
let currentProjectPath = $state<string | null>(null);
|
||||||
let currentProjectName = $state('');
|
let currentProjectName = $state('');
|
||||||
|
let projectIsV2 = $state(false);
|
||||||
let audioFilePath = $state('');
|
let audioFilePath = $state('');
|
||||||
|
let audioWavPath = $state('');
|
||||||
|
|
||||||
async function checkSidecar() {
|
async function checkSidecar() {
|
||||||
try {
|
try {
|
||||||
@@ -54,6 +56,7 @@
|
|||||||
|
|
||||||
function handleSidecarSetupComplete() {
|
function handleSidecarSetupComplete() {
|
||||||
sidecarReady = true;
|
sidecarReady = true;
|
||||||
|
configureAIProvider($settings);
|
||||||
checkSidecarUpdate();
|
checkSidecarUpdate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -71,6 +74,7 @@
|
|||||||
});
|
});
|
||||||
checkSidecar().then(() => {
|
checkSidecar().then(() => {
|
||||||
if (sidecarReady) {
|
if (sidecarReady) {
|
||||||
|
configureAIProvider($settings);
|
||||||
checkSidecarUpdate();
|
checkSidecarUpdate();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -117,25 +121,32 @@
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
let isTranscribing = $state(false);
|
let isTranscribing = $state(false);
|
||||||
|
let transcriptionCancelled = $state(false);
|
||||||
let transcriptionProgress = $state(0);
|
let transcriptionProgress = $state(0);
|
||||||
let transcriptionStage = $state('');
|
let transcriptionStage = $state('');
|
||||||
let transcriptionMessage = $state('');
|
let transcriptionMessage = $state('');
|
||||||
|
let extractingAudio = $state(false);
|
||||||
|
|
||||||
|
function handleCancelProcessing() {
|
||||||
|
transcriptionCancelled = true;
|
||||||
|
isTranscribing = false;
|
||||||
|
transcriptionProgress = 0;
|
||||||
|
transcriptionStage = '';
|
||||||
|
transcriptionMessage = '';
|
||||||
|
// Clear any partial results
|
||||||
|
segments.set([]);
|
||||||
|
speakers.set([]);
|
||||||
|
}
|
||||||
|
|
||||||
// Speaker color palette for auto-assignment
|
// Speaker color palette for auto-assignment
|
||||||
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
|
const speakerColors = ['#e94560', '#4ecdc4', '#ffe66d', '#a8e6cf', '#ff8b94', '#c7ceea', '#ffd93d', '#6bcb77'];
|
||||||
|
|
||||||
async function saveProject() {
|
function buildProjectData(projectName: string) {
|
||||||
const defaultName = currentProjectName || 'Untitled';
|
return {
|
||||||
const outputPath = await save({
|
version: 2,
|
||||||
defaultPath: `${defaultName}.vtn`,
|
name: projectName,
|
||||||
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
|
source_file: audioFilePath,
|
||||||
});
|
audio_wav: 'audio.wav',
|
||||||
if (!outputPath) return;
|
|
||||||
|
|
||||||
const projectData = {
|
|
||||||
version: 1,
|
|
||||||
name: outputPath.split(/[\\/]/).pop()?.replace('.vtn', '') || defaultName,
|
|
||||||
audio_file: audioFilePath,
|
|
||||||
created_at: new Date().toISOString(),
|
created_at: new Date().toISOString(),
|
||||||
segments: $segments.map(seg => {
|
segments: $segments.map(seg => {
|
||||||
const speaker = $speakers.find(s => s.id === seg.speaker_id);
|
const speaker = $speakers.find(s => s.id === seg.speaker_id);
|
||||||
@@ -159,17 +170,75 @@
|
|||||||
color: s.color || '#e94560',
|
color: s.color || '#e94560',
|
||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Save to a specific folder — creates .vtn + audio.wav inside it. */
|
||||||
|
async function saveToFolder(folderPath: string): Promise<boolean> {
|
||||||
|
const projectName = folderPath.split(/[\\/]/).pop() || currentProjectName || 'Untitled';
|
||||||
|
const vtnPath = `${folderPath}/${projectName}.vtn`;
|
||||||
|
const wavPath = `${folderPath}/audio.wav`;
|
||||||
|
const projectData = buildProjectData(projectName);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await invoke('save_project_file', { path: outputPath, project: projectData });
|
await invoke('create_dir', { path: folderPath });
|
||||||
currentProjectPath = outputPath;
|
if (audioWavPath && audioWavPath !== wavPath) {
|
||||||
currentProjectName = projectData.name;
|
await invoke('copy_file', { src: audioWavPath, dst: wavPath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
}
|
||||||
|
await invoke('save_project_file', { path: vtnPath, project: projectData });
|
||||||
|
currentProjectPath = vtnPath;
|
||||||
|
currentProjectName = projectName;
|
||||||
|
projectIsV2 = true;
|
||||||
|
return true;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('Failed to save project:', err);
|
console.error('Failed to save project:', err);
|
||||||
alert(`Failed to save: ${err}`);
|
alert(`Failed to save: ${err}`);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function saveProject() {
|
||||||
|
// Already saved as v2 folder — save in place
|
||||||
|
if (currentProjectPath && projectIsV2) {
|
||||||
|
const folderPath = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
|
||||||
|
await saveToFolder(folderPath);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// V1 project opened — migrate to folder structure
|
||||||
|
if (currentProjectPath && !projectIsV2) {
|
||||||
|
const oldVtnDir = currentProjectPath.replace(/[\\/][^\\/]+$/, '');
|
||||||
|
const projectName = currentProjectPath.split(/[\\/]/).pop()?.replace(/\.vtn$/i, '') || 'Untitled';
|
||||||
|
const folderPath = `${oldVtnDir}/${projectName}`;
|
||||||
|
const success = await saveToFolder(folderPath);
|
||||||
|
if (success) {
|
||||||
|
// Optionally remove the old .vtn file
|
||||||
|
try {
|
||||||
|
// Leave old file — user can delete manually
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Never saved — pick a folder
|
||||||
|
await saveProjectAs();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveProjectAs() {
|
||||||
|
// Use save dialog so the user can type a new project name.
|
||||||
|
// The chosen path is treated as the project folder (created if needed).
|
||||||
|
const defaultName = currentProjectName || 'Untitled';
|
||||||
|
const chosenPath = await save({
|
||||||
|
defaultPath: defaultName,
|
||||||
|
title: 'Save Project — enter a project name',
|
||||||
|
});
|
||||||
|
if (!chosenPath) return;
|
||||||
|
|
||||||
|
// Strip any file extension the user may have typed (e.g. ".vtn")
|
||||||
|
const folderPath = chosenPath.replace(/\.[^.\\/]+$/, '');
|
||||||
|
await saveToFolder(folderPath);
|
||||||
|
}
|
||||||
|
|
||||||
async function openProject() {
|
async function openProject() {
|
||||||
const filePath = await open({
|
const filePath = await open({
|
||||||
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
|
filters: [{ name: 'Voice to Notes Project', extensions: ['vtn'] }],
|
||||||
@@ -179,9 +248,11 @@
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const project = await invoke<{
|
const project = await invoke<{
|
||||||
version: number;
|
version?: number;
|
||||||
name: string;
|
name: string;
|
||||||
audio_file: string;
|
audio_file?: string;
|
||||||
|
source_file?: string;
|
||||||
|
audio_wav?: string;
|
||||||
segments: Array<{
|
segments: Array<{
|
||||||
text: string;
|
text: string;
|
||||||
start_ms: number;
|
start_ms: number;
|
||||||
@@ -231,10 +302,135 @@
|
|||||||
}));
|
}));
|
||||||
segments.set(newSegments);
|
segments.set(newSegments);
|
||||||
|
|
||||||
// Load audio
|
// Determine the directory the .vtn file is in
|
||||||
audioFilePath = project.audio_file;
|
const vtnDir = (filePath as string).replace(/[\\/][^\\/]+$/, '');
|
||||||
audioUrl = convertFileSrc(project.audio_file);
|
const version = project.version ?? 1;
|
||||||
waveformPlayer?.loadAudio(audioUrl);
|
projectIsV2 = version >= 2;
|
||||||
|
|
||||||
|
// Resolve audio for wavesurfer playback
|
||||||
|
if (version >= 2) {
|
||||||
|
// Version 2: audio_wav is relative to the .vtn directory, source_file is the original import path
|
||||||
|
audioFilePath = project.source_file || '';
|
||||||
|
const wavRelative = project.audio_wav || 'audio.wav';
|
||||||
|
const resolvedWav = `${vtnDir}/${wavRelative}`;
|
||||||
|
|
||||||
|
const wavExists = await invoke<boolean>('check_file_exists', { path: resolvedWav });
|
||||||
|
if (wavExists) {
|
||||||
|
audioWavPath = resolvedWav;
|
||||||
|
audioUrl = convertFileSrc(resolvedWav);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
} else {
|
||||||
|
// WAV missing — try re-extracting from the original source file
|
||||||
|
const sourceExists = audioFilePath ? await invoke<boolean>('check_file_exists', { path: audioFilePath }) : false;
|
||||||
|
if (sourceExists) {
|
||||||
|
extractingAudio = true;
|
||||||
|
await tick();
|
||||||
|
try {
|
||||||
|
const outputPath = `${vtnDir}/${wavRelative}`;
|
||||||
|
const wavPath = await invoke<string>('extract_audio', { filePath: audioFilePath, outputPath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
audioUrl = convertFileSrc(wavPath);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to re-extract audio:', err);
|
||||||
|
alert(`Failed to re-extract audio: ${err}`);
|
||||||
|
} finally {
|
||||||
|
extractingAudio = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Both missing — ask user to locate the file
|
||||||
|
const shouldRelink = confirm(
|
||||||
|
'The audio file for this project could not be found.\n\n' +
|
||||||
|
`Original source: ${audioFilePath || '(unknown)'}\n\n` +
|
||||||
|
'Would you like to locate the file?'
|
||||||
|
);
|
||||||
|
if (shouldRelink) {
|
||||||
|
const newPath = await open({
|
||||||
|
multiple: false,
|
||||||
|
filters: [{
|
||||||
|
name: 'Audio/Video',
|
||||||
|
extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
|
||||||
|
'mp4', 'mkv', 'avi', 'mov', 'webm'],
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
if (newPath) {
|
||||||
|
audioFilePath = newPath;
|
||||||
|
extractingAudio = true;
|
||||||
|
await tick();
|
||||||
|
try {
|
||||||
|
const outputPath = `${vtnDir}/${wavRelative}`;
|
||||||
|
const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
audioUrl = convertFileSrc(wavPath);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to extract audio from re-linked file:', err);
|
||||||
|
alert(`Failed to extract audio: ${err}`);
|
||||||
|
} finally {
|
||||||
|
extractingAudio = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Version 1 (legacy): audio_file is the source path
|
||||||
|
const sourceFile = project.audio_file || '';
|
||||||
|
audioFilePath = sourceFile;
|
||||||
|
|
||||||
|
const sourceExists = sourceFile ? await invoke<boolean>('check_file_exists', { path: sourceFile }) : false;
|
||||||
|
if (sourceExists) {
|
||||||
|
// Extract WAV next to the .vtn file for playback
|
||||||
|
extractingAudio = true;
|
||||||
|
await tick();
|
||||||
|
try {
|
||||||
|
const outputPath = `${vtnDir}/audio.wav`;
|
||||||
|
const wavPath = await invoke<string>('extract_audio', { filePath: sourceFile, outputPath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
audioUrl = convertFileSrc(wavPath);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to extract audio:', err);
|
||||||
|
alert(`Failed to extract audio: ${err}`);
|
||||||
|
} finally {
|
||||||
|
extractingAudio = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Source missing — ask user to locate the file
|
||||||
|
const shouldRelink = confirm(
|
||||||
|
'The audio file for this project could not be found.\n\n' +
|
||||||
|
`Original path: ${sourceFile || '(unknown)'}\n\n` +
|
||||||
|
'Would you like to locate the file?'
|
||||||
|
);
|
||||||
|
if (shouldRelink) {
|
||||||
|
const newPath = await open({
|
||||||
|
multiple: false,
|
||||||
|
filters: [{
|
||||||
|
name: 'Audio/Video',
|
||||||
|
extensions: ['mp3', 'wav', 'flac', 'ogg', 'm4a', 'aac', 'wma',
|
||||||
|
'mp4', 'mkv', 'avi', 'mov', 'webm'],
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
if (newPath) {
|
||||||
|
audioFilePath = newPath;
|
||||||
|
extractingAudio = true;
|
||||||
|
await tick();
|
||||||
|
try {
|
||||||
|
const outputPath = `${vtnDir}/audio.wav`;
|
||||||
|
const wavPath = await invoke<string>('extract_audio', { filePath: newPath, outputPath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
audioUrl = convertFileSrc(wavPath);
|
||||||
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to extract audio from re-linked file:', err);
|
||||||
|
alert(`Failed to extract audio: ${err}`);
|
||||||
|
} finally {
|
||||||
|
extractingAudio = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
currentProjectPath = filePath as string;
|
currentProjectPath = filePath as string;
|
||||||
currentProjectName = project.name;
|
currentProjectName = project.name;
|
||||||
@@ -265,9 +461,35 @@
|
|||||||
});
|
});
|
||||||
if (!filePath) return;
|
if (!filePath) return;
|
||||||
|
|
||||||
// Track the original file path and convert to asset URL for wavesurfer
|
// Always extract audio to WAV for wavesurfer playback
|
||||||
|
extractingAudio = true;
|
||||||
|
await tick();
|
||||||
|
try {
|
||||||
|
const wavPath = await invoke<string>('extract_audio', { filePath });
|
||||||
|
audioWavPath = wavPath;
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[voice-to-notes] Failed to extract audio:', err);
|
||||||
|
const msg = String(err);
|
||||||
|
if (msg.includes('ffmpeg not found')) {
|
||||||
|
alert(
|
||||||
|
'FFmpeg is required to extract audio.\n\n' +
|
||||||
|
'Install FFmpeg:\n' +
|
||||||
|
' Windows: winget install ffmpeg\n' +
|
||||||
|
' macOS: brew install ffmpeg\n' +
|
||||||
|
' Linux: sudo apt install ffmpeg\n\n' +
|
||||||
|
'Then restart Voice to Notes and try again.'
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
alert(`Failed to extract audio: ${msg}`);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} finally {
|
||||||
|
extractingAudio = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track the original file path for the sidecar (it does its own conversion)
|
||||||
audioFilePath = filePath;
|
audioFilePath = filePath;
|
||||||
audioUrl = convertFileSrc(filePath);
|
audioUrl = convertFileSrc(audioWavPath);
|
||||||
waveformPlayer?.loadAudio(audioUrl);
|
waveformPlayer?.loadAudio(audioUrl);
|
||||||
|
|
||||||
// Clear previous results
|
// Clear previous results
|
||||||
@@ -276,6 +498,7 @@
|
|||||||
|
|
||||||
// Start pipeline (transcription + diarization)
|
// Start pipeline (transcription + diarization)
|
||||||
isTranscribing = true;
|
isTranscribing = true;
|
||||||
|
transcriptionCancelled = false;
|
||||||
transcriptionProgress = 0;
|
transcriptionProgress = 0;
|
||||||
transcriptionStage = 'Starting...';
|
transcriptionStage = 'Starting...';
|
||||||
transcriptionMessage = 'Initializing pipeline...';
|
transcriptionMessage = 'Initializing pipeline...';
|
||||||
@@ -386,6 +609,9 @@
|
|||||||
numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
|
numSpeakers: $settings.num_speakers && $settings.num_speakers > 0 ? $settings.num_speakers : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// If cancelled while processing, discard results
|
||||||
|
if (transcriptionCancelled) return;
|
||||||
|
|
||||||
// Create speaker entries from pipeline result
|
// Create speaker entries from pipeline result
|
||||||
const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
|
const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({
|
||||||
id: `speaker-${idx}`,
|
id: `speaker-${idx}`,
|
||||||
@@ -524,7 +750,10 @@
|
|||||||
</button>
|
</button>
|
||||||
{#if $segments.length > 0}
|
{#if $segments.length > 0}
|
||||||
<button class="settings-btn" onclick={saveProject}>
|
<button class="settings-btn" onclick={saveProject}>
|
||||||
Save Project
|
Save
|
||||||
|
</button>
|
||||||
|
<button class="settings-btn" onclick={saveProjectAs}>
|
||||||
|
Save As
|
||||||
</button>
|
</button>
|
||||||
{/if}
|
{/if}
|
||||||
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
|
<button class="import-btn" onclick={handleFileImport} disabled={isTranscribing}>
|
||||||
@@ -573,8 +802,18 @@
|
|||||||
percent={transcriptionProgress}
|
percent={transcriptionProgress}
|
||||||
stage={transcriptionStage}
|
stage={transcriptionStage}
|
||||||
message={transcriptionMessage}
|
message={transcriptionMessage}
|
||||||
|
onCancel={handleCancelProcessing}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
{#if extractingAudio}
|
||||||
|
<div class="extraction-overlay">
|
||||||
|
<div class="extraction-card">
|
||||||
|
<div class="extraction-spinner"></div>
|
||||||
|
<p>Extracting audio...</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
|
||||||
<SettingsModal
|
<SettingsModal
|
||||||
visible={showSettings}
|
visible={showSettings}
|
||||||
onClose={() => showSettings = false}
|
onClose={() => showSettings = false}
|
||||||
@@ -781,4 +1020,39 @@
|
|||||||
.update-dismiss:hover {
|
.update-dismiss:hover {
|
||||||
color: #e0e0e0;
|
color: #e0e0e0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Audio extraction overlay */
|
||||||
|
.extraction-overlay {
|
||||||
|
position: fixed;
|
||||||
|
inset: 0;
|
||||||
|
background: rgba(0, 0, 0, 0.8);
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
z-index: 9999;
|
||||||
|
}
|
||||||
|
.extraction-card {
|
||||||
|
background: #16213e;
|
||||||
|
padding: 2rem 2.5rem;
|
||||||
|
border-radius: 12px;
|
||||||
|
color: #e0e0e0;
|
||||||
|
border: 1px solid #2a3a5e;
|
||||||
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
.extraction-card p {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
.extraction-spinner {
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
border: 3px solid #2a3a5e;
|
||||||
|
border-top-color: #e94560;
|
||||||
|
border-radius: 50%;
|
||||||
|
animation: spin 0.8s linear infinite;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
Reference in New Issue
Block a user