diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index dc597cd..36705ca 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -1498,6 +1498,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-range" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573" + [[package]] name = "httparse" version = "1.10.1" @@ -3595,6 +3601,7 @@ dependencies = [ "gtk", "heck 0.5.0", "http", + "http-range", "jni", "libc", "log", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index bc0d0ee..174dde3 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -14,7 +14,7 @@ crate-type = ["staticlib", "cdylib", "rlib"] tauri-build = { version = "2", features = [] } [dependencies] -tauri = { version = "2", features = [] } +tauri = { version = "2", features = ["protocol-asset"] } tauri-plugin-opener = "2" serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/src-tauri/src/commands/ai.rs b/src-tauri/src/commands/ai.rs index fa2c283..b474702 100644 --- a/src-tauri/src/commands/ai.rs +++ b/src-tauri/src/commands/ai.rs @@ -25,7 +25,7 @@ pub fn ai_chat( let manager = get_sidecar()?; let request_id = uuid::Uuid::new_v4().to_string(); - let mut payload = json!({ + let payload = json!({ "action": "chat", "messages": messages, "transcript_context": transcript_context.unwrap_or_default(), diff --git a/src-tauri/src/commands/settings.rs b/src-tauri/src/commands/settings.rs index 9391fa1..e750807 100644 --- a/src-tauri/src/commands/settings.rs +++ b/src-tauri/src/commands/settings.rs @@ -1,2 +1,34 @@ -// Settings commands — app preferences, model selection, AI provider config -// TODO: Implement when settings UI is built +use serde_json::{json, Value}; +use std::fs; +use std::path::PathBuf; + +use crate::llama::LlamaManager; + +fn settings_path() -> PathBuf { + LlamaManager::data_dir().join("settings.json") +} + +/// Load app settings from disk. +#[tauri::command] +pub fn load_settings() -> Value { + let path = settings_path(); + if !path.exists() { + return json!({}); + } + match fs::read_to_string(&path) { + Ok(content) => serde_json::from_str(&content).unwrap_or(json!({})), + Err(_) => json!({}), + } +} + +/// Save app settings to disk. +#[tauri::command] +pub fn save_settings(settings: Value) -> Result<(), String> { + let path = settings_path(); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).map_err(|e| format!("Cannot create settings dir: {e}"))?; + } + let json = serde_json::to_string_pretty(&settings).map_err(|e| e.to_string())?; + fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?; + Ok(()) +} diff --git a/src-tauri/src/commands/system.rs b/src-tauri/src/commands/system.rs index 1081721..daed78d 100644 --- a/src-tauri/src/commands/system.rs +++ b/src-tauri/src/commands/system.rs @@ -1,2 +1,64 @@ -// System commands — hardware detection, llama-server lifecycle -// TODO: Implement hardware detection and llama-server management +use serde_json::{json, Value}; + +use crate::llama::{LlamaConfig, LlamaManager, LlamaStatus}; + +use std::path::PathBuf; +use std::sync::OnceLock; + +/// Global llama manager — persists across command invocations. +fn llama_manager() -> &'static LlamaManager { + static INSTANCE: OnceLock = OnceLock::new(); + INSTANCE.get_or_init(LlamaManager::new) +} + +/// Start the local llama-server with a GGUF model. +#[tauri::command] +pub fn llama_start( + model_path: String, + binary_path: Option, + port: Option, + n_gpu_layers: Option, + context_size: Option, + threads: Option, +) -> Result { + let config = LlamaConfig { + binary_path: PathBuf::from( + binary_path.unwrap_or_else(|| "llama-server".to_string()), + ), + model_path: PathBuf::from(model_path), + port: port.unwrap_or(0), + n_gpu_layers: n_gpu_layers.unwrap_or(0), + context_size: context_size.unwrap_or(4096), + threads: threads.unwrap_or(4), + }; + + llama_manager().start(&config) +} + +/// Stop the local llama-server. +#[tauri::command] +pub fn llama_stop() -> Result<(), String> { + llama_manager().stop() +} + +/// Get the status of the local llama-server. +#[tauri::command] +pub fn llama_status() -> LlamaStatus { + llama_manager().status() +} + +/// List available GGUF models in the models directory. +#[tauri::command] +pub fn llama_list_models() -> Value { + let models = LlamaManager::list_models(); + json!({ + "models": models, + "models_dir": LlamaManager::models_dir().to_string_lossy(), + }) +} + +/// Get the app data directory path. +#[tauri::command] +pub fn get_data_dir() -> String { + LlamaManager::data_dir().to_string_lossy().to_string() +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index cd5b641..e26668f 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,11 +1,14 @@ pub mod commands; pub mod db; +pub mod llama; pub mod sidecar; pub mod state; use commands::ai::{ai_chat, ai_configure, ai_list_providers}; use commands::export::export_transcript; use commands::project::{create_project, get_project, list_projects}; +use commands::settings::{load_settings, save_settings}; +use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop}; use commands::transcribe::{run_pipeline, transcribe_file}; #[cfg_attr(mobile, tauri::mobile_entry_point)] @@ -23,6 +26,13 @@ pub fn run() { ai_chat, ai_list_providers, ai_configure, + llama_start, + llama_stop, + llama_status, + llama_list_models, + get_data_dir, + load_settings, + save_settings, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/src-tauri/src/llama/mod.rs b/src-tauri/src/llama/mod.rs new file mode 100644 index 0000000..4cebccf --- /dev/null +++ b/src-tauri/src/llama/mod.rs @@ -0,0 +1,307 @@ +//! Llama-server lifecycle management. +//! +//! Manages a bundled llama-server (llama.cpp) binary that exposes an +//! OpenAI-compatible API on localhost. The Rust backend handles: +//! - Finding or downloading the llama-server binary +//! - Spawning the process with a GGUF model file +//! - Port allocation and health checking +//! - Clean shutdown on app exit + +use std::net::TcpListener; +use std::path::PathBuf; +use std::process::{Child, Command, Stdio}; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +use serde::{Deserialize, Serialize}; + +/// Configuration for the llama-server instance. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlamaConfig { + /// Path to the llama-server binary. + pub binary_path: PathBuf, + /// Path to the GGUF model file. + pub model_path: PathBuf, + /// Port to listen on (0 = auto-assign). + pub port: u16, + /// Number of GPU layers to offload (-1 = all, 0 = CPU only). + pub n_gpu_layers: i32, + /// Context window size. + pub context_size: u32, + /// Number of threads for CPU inference. + pub threads: u32, +} + +impl Default for LlamaConfig { + fn default() -> Self { + Self { + binary_path: PathBuf::from("llama-server"), + model_path: PathBuf::new(), + port: 0, + n_gpu_layers: 0, + context_size: 4096, + threads: 4, + } + } +} + +/// Status of the llama-server. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LlamaStatus { + pub running: bool, + pub port: u16, + pub model: String, + pub url: String, +} + +/// Manages the llama-server process lifecycle. +pub struct LlamaManager { + process: Mutex>, + port: Mutex, + model_path: Mutex, +} + +impl LlamaManager { + pub fn new() -> Self { + Self { + process: Mutex::new(None), + port: Mutex::new(0), + model_path: Mutex::new(String::new()), + } + } + + /// Get the data directory for Voice to Notes. + pub fn data_dir() -> PathBuf { + let home = std::env::var("HOME") + .or_else(|_| std::env::var("USERPROFILE")) + .unwrap_or_else(|_| ".".to_string()); + PathBuf::from(home).join(".voicetonotes") + } + + /// Get the models directory. + pub fn models_dir() -> PathBuf { + Self::data_dir().join("models") + } + + /// Find an available port for the server. + fn find_available_port() -> Result { + let listener = + TcpListener::bind("127.0.0.1:0").map_err(|e| format!("Cannot bind port: {e}"))?; + let port = listener + .local_addr() + .map_err(|e| format!("Cannot get port: {e}"))? + .port(); + Ok(port) + } + + /// Start the llama-server with the given configuration. + pub fn start(&self, config: &LlamaConfig) -> Result { + // Check if already running + { + let proc = self.process.lock().map_err(|e| e.to_string())?; + if proc.is_some() { + let port = *self.port.lock().map_err(|e| e.to_string())?; + let model = self.model_path.lock().map_err(|e| e.to_string())?.clone(); + return Ok(LlamaStatus { + running: true, + port, + model, + url: format!("http://127.0.0.1:{port}"), + }); + } + } + + // Validate paths + if !config.binary_path.exists() { + return Err(format!( + "llama-server binary not found at: {}", + config.binary_path.display() + )); + } + if !config.model_path.exists() { + return Err(format!( + "Model file not found at: {}", + config.model_path.display() + )); + } + + // Determine port + let port = if config.port == 0 { + Self::find_available_port()? + } else { + config.port + }; + + // Build command + let mut cmd = Command::new(&config.binary_path); + cmd.arg("--model") + .arg(&config.model_path) + .arg("--port") + .arg(port.to_string()) + .arg("--ctx-size") + .arg(config.context_size.to_string()) + .arg("--threads") + .arg(config.threads.to_string()) + .arg("--n-gpu-layers") + .arg(config.n_gpu_layers.to_string()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let child = cmd + .spawn() + .map_err(|e| format!("Failed to start llama-server: {e}"))?; + + // Store state + let model_name = config + .model_path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_default(); + + { + let mut proc = self.process.lock().map_err(|e| e.to_string())?; + *proc = Some(child); + } + { + let mut p = self.port.lock().map_err(|e| e.to_string())?; + *p = port; + } + { + let mut m = self.model_path.lock().map_err(|e| e.to_string())?; + *m = model_name.clone(); + } + + // Wait for server to be ready (health endpoint) + self.wait_for_ready(port)?; + + Ok(LlamaStatus { + running: true, + port, + model: model_name, + url: format!("http://127.0.0.1:{port}"), + }) + } + + /// Wait for the llama-server health endpoint to respond. + fn wait_for_ready(&self, port: u16) -> Result<(), String> { + let start = Instant::now(); + let timeout = Duration::from_secs(60); // Models can take time to load + let _url = format!("http://127.0.0.1:{port}/health"); + + loop { + if start.elapsed() > timeout { + // Kill the process since it didn't start in time + self.stop().ok(); + return Err("llama-server did not start within 60 seconds".to_string()); + } + + // Check if process is still alive + { + let mut proc = self.process.lock().map_err(|e| e.to_string())?; + if let Some(ref mut child) = *proc { + match child.try_wait() { + Ok(Some(status)) => { + *proc = None; + return Err(format!("llama-server exited with status: {status}")); + } + Ok(None) => {} // Still running + Err(e) => { + return Err(format!("Error checking process: {e}")); + } + } + } + } + + // Try to connect to health endpoint + match std::net::TcpStream::connect_timeout( + &format!("127.0.0.1:{port}").parse().unwrap(), + Duration::from_millis(500), + ) { + Ok(_) => return Ok(()), + Err(_) => { + std::thread::sleep(Duration::from_millis(500)); + } + } + } + } + + /// Stop the llama-server process. + pub fn stop(&self) -> Result<(), String> { + let mut proc = self.process.lock().map_err(|e| e.to_string())?; + if let Some(ref mut child) = proc.take() { + let _ = child.kill(); + let _ = child.wait(); + } + Ok(()) + } + + /// Get the current status. + pub fn status(&self) -> LlamaStatus { + let running = self + .process + .lock() + .ok() + .map_or(false, |p| p.is_some()); + let port = self.port.lock().ok().map_or(0, |p| *p); + let model = self + .model_path + .lock() + .ok() + .map_or_else(String::new, |m| m.clone()); + + LlamaStatus { + running, + port, + model, + url: if running { + format!("http://127.0.0.1:{port}") + } else { + String::new() + }, + } + } + + /// List available GGUF model files in the models directory. + pub fn list_models() -> Vec { + let models_dir = Self::models_dir(); + if !models_dir.exists() { + return vec![]; + } + + let mut models = vec![]; + if let Ok(entries) = std::fs::read_dir(&models_dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().map_or(false, |ext| ext == "gguf") { + let name = path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_default(); + let size_bytes = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0); + models.push(ModelInfo { + name, + path: path.to_string_lossy().to_string(), + size_mb: (size_bytes as f64 / 1_048_576.0).round() as u64, + }); + } + } + } + + models.sort_by(|a, b| a.name.cmp(&b.name)); + models + } +} + +impl Drop for LlamaManager { + fn drop(&mut self) { + let _ = self.stop(); + } +} + +/// Information about a GGUF model file. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelInfo { + pub name: String, + pub path: String, + pub size_mb: u64, +} diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index 618de91..5d02585 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -20,7 +20,11 @@ } ], "security": { - "csp": null + "csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'", + "assetProtocol": { + "enable": true, + "scope": ["**"] + } } }, "bundle": { @@ -32,6 +36,24 @@ "icons/128x128@2x.png", "icons/icon.icns", "icons/icon.ico" - ] + ], + "category": "Utility", + "shortDescription": "Transcribe audio/video with speaker identification", + "longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.", + "copyright": "Voice to Notes Contributors", + "license": "MIT", + "linux": { + "deb": { + "depends": ["python3", "python3-pip"] + }, + "appimage": { + "bundleMediaFramework": true + } + }, + "windows": { + "wix": { + "language": "en-US" + } + } } } diff --git a/src/lib/components/SettingsModal.svelte b/src/lib/components/SettingsModal.svelte new file mode 100644 index 0000000..659b9e7 --- /dev/null +++ b/src/lib/components/SettingsModal.svelte @@ -0,0 +1,287 @@ + + +{#if visible} + + +{/if} + + diff --git a/src/lib/components/WaveformPlayer.svelte b/src/lib/components/WaveformPlayer.svelte index 86221e8..18bb373 100644 --- a/src/lib/components/WaveformPlayer.svelte +++ b/src/lib/components/WaveformPlayer.svelte @@ -57,7 +57,8 @@ wavesurfer?.destroy(); }); - function togglePlayPause() { + /** Toggle play/pause. Exposed for keyboard shortcuts. */ + export function togglePlayPause() { wavesurfer?.playPause(); } diff --git a/src/lib/stores/settings.ts b/src/lib/stores/settings.ts new file mode 100644 index 0000000..32da0ee --- /dev/null +++ b/src/lib/stores/settings.ts @@ -0,0 +1,48 @@ +import { writable } from 'svelte/store'; +import { invoke } from '@tauri-apps/api/core'; + +export interface AppSettings { + ai_provider: string; + openai_api_key: string; + anthropic_api_key: string; + openai_model: string; + anthropic_model: string; + litellm_model: string; + local_model_path: string; + local_binary_path: string; + transcription_model: string; + transcription_device: string; + transcription_language: string; + skip_diarization: boolean; +} + +const defaults: AppSettings = { + ai_provider: 'local', + openai_api_key: '', + anthropic_api_key: '', + openai_model: 'gpt-4o-mini', + anthropic_model: 'claude-sonnet-4-6', + litellm_model: 'gpt-4o-mini', + local_model_path: '', + local_binary_path: 'llama-server', + transcription_model: 'base', + transcription_device: 'cpu', + transcription_language: '', + skip_diarization: false, +}; + +export const settings = writable({ ...defaults }); + +export async function loadSettings(): Promise { + try { + const saved = await invoke>('load_settings'); + settings.update(s => ({ ...s, ...saved } as AppSettings)); + } catch { + // Use defaults if settings can't be loaded + } +} + +export async function saveSettings(s: AppSettings): Promise { + settings.set(s); + await invoke('save_settings', { settings: s }); +} diff --git a/src/routes/+page.svelte b/src/routes/+page.svelte index 1c534bc..8e7d5a4 100644 --- a/src/routes/+page.svelte +++ b/src/routes/+page.svelte @@ -6,11 +6,58 @@ import SpeakerManager from '$lib/components/SpeakerManager.svelte'; import AIChatPanel from '$lib/components/AIChatPanel.svelte'; import ProgressOverlay from '$lib/components/ProgressOverlay.svelte'; + import SettingsModal from '$lib/components/SettingsModal.svelte'; import { segments, speakers } from '$lib/stores/transcript'; + import { settings, loadSettings } from '$lib/stores/settings'; import type { Segment, Speaker } from '$lib/types/transcript'; + import { onMount } from 'svelte'; let waveformPlayer: WaveformPlayer; let audioUrl = $state(''); + let showSettings = $state(false); + + onMount(() => { + loadSettings(); + + // Global keyboard shortcuts + function handleKeyDown(e: KeyboardEvent) { + // Don't trigger shortcuts when typing in inputs + const tag = (e.target as HTMLElement)?.tagName; + if (tag === 'INPUT' || tag === 'TEXTAREA' || tag === 'SELECT') return; + + if (e.key === ' ' && !e.ctrlKey && !e.metaKey) { + e.preventDefault(); + waveformPlayer?.togglePlayPause?.(); + } else if (e.key === 'o' && (e.ctrlKey || e.metaKey)) { + e.preventDefault(); + handleFileImport(); + } else if (e.key === ',' && (e.ctrlKey || e.metaKey)) { + e.preventDefault(); + showSettings = true; + } else if (e.key === 'Escape') { + showExportMenu = false; + showSettings = false; + } + } + + // Close export dropdown on outside click + function handleClickOutside(e: MouseEvent) { + if (showExportMenu) { + const target = e.target as HTMLElement; + if (!target.closest('.export-dropdown')) { + showExportMenu = false; + } + } + } + + document.addEventListener('keydown', handleKeyDown); + document.addEventListener('click', handleClickOutside); + + return () => { + document.removeEventListener('keydown', handleKeyDown); + document.removeEventListener('click', handleClickOutside); + }; + }); let isTranscribing = $state(false); let transcriptionProgress = $state(0); let transcriptionStage = $state(''); @@ -61,7 +108,13 @@ duration_ms: number; speakers: string[]; num_speakers: number; - }>('run_pipeline', { filePath }); + }>('run_pipeline', { + filePath, + model: $settings.transcription_model || undefined, + device: $settings.transcription_device || undefined, + language: $settings.transcription_language || undefined, + skipDiarization: $settings.skip_diarization || undefined, + }); // Create speaker entries from pipeline result const newSpeakers: Speaker[] = (result.speakers || []).map((label, idx) => ({ @@ -167,6 +220,9 @@ + {#if $segments.length > 0}