Phase 6: Llama-server manager, settings UI, packaging, and polish

- Implement LlamaManager in Rust for llama-server lifecycle: spawn with port allocation, health check, clean shutdown on Drop, model listing - Add llama_start/stop/status/list_models Tauri commands - Add load_settings/save_settings commands with JSON persistence - Build SettingsModal with tabs for Transcription, AI Provider, Local AI settings (model size, device, language, API keys, provider selection) - Wire settings into pipeline calls (model, device, language, skip diarization) - Configure Tauri packaging: asset protocol for local audio files, CSP policy, bundle metadata, Linux .deb/.AppImage and Windows .msi config - Add keyboard shortcuts: Space (play/pause), Ctrl+O (import), Ctrl+, (settings), Escape (close menus/modals) - Close export dropdown on outside click - Tests: 30 Python, 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 16:38:23 -08:00
parent d67625cd5a
commit 97a1a15755
12 changed files with 860 additions and 10 deletions
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -1498,6 +1498,12 @@ dependencies = [
 "pin-project-lite",
 ]

+[[package]]
+name = "http-range"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573"
+
 [[package]]
 name = "httparse"
 version = "1.10.1"
@@ -3595,6 +3601,7 @@ dependencies = [
 "gtk",
 "heck 0.5.0",
 "http",
+ "http-range",
 "jni",
 "libc",
 "log",
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -14,7 +14,7 @@ crate-type = ["staticlib", "cdylib", "rlib"]
 tauri-build = { version = "2", features = [] }

 [dependencies]
-tauri = { version = "2", features = [] }
+tauri = { version = "2", features = ["protocol-asset"] }
 tauri-plugin-opener = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
--- a/src-tauri/src/commands/ai.rs
+++ b/src-tauri/src/commands/ai.rs
@@ -25,7 +25,7 @@ pub fn ai_chat(
    let manager = get_sidecar()?;

    let request_id = uuid::Uuid::new_v4().to_string();
-    let mut payload = json!({
+    let payload = json!({
        "action": "chat",
        "messages": messages,
        "transcript_context": transcript_context.unwrap_or_default(),
--- a/src-tauri/src/commands/settings.rs
+++ b/src-tauri/src/commands/settings.rs
@@ -1,2 +1,34 @@
-// Settings commands — app preferences, model selection, AI provider config
-// TODO: Implement when settings UI is built
+use serde_json::{json, Value};
+use std::fs;
+use std::path::PathBuf;
+
+use crate::llama::LlamaManager;
+
+fn settings_path() -> PathBuf {
+    LlamaManager::data_dir().join("settings.json")
+}
+
+/// Load app settings from disk.
+#[tauri::command]
+pub fn load_settings() -> Value {
+    let path = settings_path();
+    if !path.exists() {
+        return json!({});
+    }
+    match fs::read_to_string(&path) {
+        Ok(content) => serde_json::from_str(&content).unwrap_or(json!({})),
+        Err(_) => json!({}),
+    }
+}
+
+/// Save app settings to disk.
+#[tauri::command]
+pub fn save_settings(settings: Value) -> Result<(), String> {
+    let path = settings_path();
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent).map_err(|e| format!("Cannot create settings dir: {e}"))?;
+    }
+    let json = serde_json::to_string_pretty(&settings).map_err(|e| e.to_string())?;
+    fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
+    Ok(())
+}
--- a/src-tauri/src/commands/system.rs
+++ b/src-tauri/src/commands/system.rs
@@ -1,2 +1,64 @@
-// System commands — hardware detection, llama-server lifecycle
-// TODO: Implement hardware detection and llama-server management
+use serde_json::{json, Value};
+
+use crate::llama::{LlamaConfig, LlamaManager, LlamaStatus};
+
+use std::path::PathBuf;
+use std::sync::OnceLock;
+
+/// Global llama manager — persists across command invocations.
+fn llama_manager() -> &'static LlamaManager {
+    static INSTANCE: OnceLock<LlamaManager> = OnceLock::new();
+    INSTANCE.get_or_init(LlamaManager::new)
+}
+
+/// Start the local llama-server with a GGUF model.
+#[tauri::command]
+pub fn llama_start(
+    model_path: String,
+    binary_path: Option<String>,
+    port: Option<u16>,
+    n_gpu_layers: Option<i32>,
+    context_size: Option<u32>,
+    threads: Option<u32>,
+) -> Result<LlamaStatus, String> {
+    let config = LlamaConfig {
+        binary_path: PathBuf::from(
+            binary_path.unwrap_or_else(|| "llama-server".to_string()),
+        ),
+        model_path: PathBuf::from(model_path),
+        port: port.unwrap_or(0),
+        n_gpu_layers: n_gpu_layers.unwrap_or(0),
+        context_size: context_size.unwrap_or(4096),
+        threads: threads.unwrap_or(4),
+    };
+
+    llama_manager().start(&config)
+}
+
+/// Stop the local llama-server.
+#[tauri::command]
+pub fn llama_stop() -> Result<(), String> {
+    llama_manager().stop()
+}
+
+/// Get the status of the local llama-server.
+#[tauri::command]
+pub fn llama_status() -> LlamaStatus {
+    llama_manager().status()
+}
+
+/// List available GGUF models in the models directory.
+#[tauri::command]
+pub fn llama_list_models() -> Value {
+    let models = LlamaManager::list_models();
+    json!({
+        "models": models,
+        "models_dir": LlamaManager::models_dir().to_string_lossy(),
+    })
+}
+
+/// Get the app data directory path.
+#[tauri::command]
+pub fn get_data_dir() -> String {
+    LlamaManager::data_dir().to_string_lossy().to_string()
+}
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -1,11 +1,14 @@
 pub mod commands;
 pub mod db;
+pub mod llama;
 pub mod sidecar;
 pub mod state;

 use commands::ai::{ai_chat, ai_configure, ai_list_providers};
 use commands::export::export_transcript;
 use commands::project::{create_project, get_project, list_projects};
+use commands::settings::{load_settings, save_settings};
+use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
 use commands::transcribe::{run_pipeline, transcribe_file};

 #[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -23,6 +26,13 @@ pub fn run() {
            ai_chat,
            ai_list_providers,
            ai_configure,
+            llama_start,
+            llama_stop,
+            llama_status,
+            llama_list_models,
+            get_data_dir,
+            load_settings,
+            save_settings,
        ])
        .run(tauri::generate_context!())
        .expect("error while running tauri application");
--- a/src-tauri/src/llama/mod.rs
+++ b/src-tauri/src/llama/mod.rs
@@ -0,0 +1,307 @@
+//! Llama-server lifecycle management.
+//!
+//! Manages a bundled llama-server (llama.cpp) binary that exposes an
+//! OpenAI-compatible API on localhost. The Rust backend handles:
+//! - Finding or downloading the llama-server binary
+//! - Spawning the process with a GGUF model file
+//! - Port allocation and health checking
+//! - Clean shutdown on app exit
+
+use std::net::TcpListener;
+use std::path::PathBuf;
+use std::process::{Child, Command, Stdio};
+use std::sync::Mutex;
+use std::time::{Duration, Instant};
+
+use serde::{Deserialize, Serialize};
+
+/// Configuration for the llama-server instance.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LlamaConfig {
+    /// Path to the llama-server binary.
+    pub binary_path: PathBuf,
+    /// Path to the GGUF model file.
+    pub model_path: PathBuf,
+    /// Port to listen on (0 = auto-assign).
+    pub port: u16,
+    /// Number of GPU layers to offload (-1 = all, 0 = CPU only).
+    pub n_gpu_layers: i32,
+    /// Context window size.
+    pub context_size: u32,
+    /// Number of threads for CPU inference.
+    pub threads: u32,
+}
+
+impl Default for LlamaConfig {
+    fn default() -> Self {
+        Self {
+            binary_path: PathBuf::from("llama-server"),
+            model_path: PathBuf::new(),
+            port: 0,
+            n_gpu_layers: 0,
+            context_size: 4096,
+            threads: 4,
+        }
+    }
+}
+
+/// Status of the llama-server.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LlamaStatus {
+    pub running: bool,
+    pub port: u16,
+    pub model: String,
+    pub url: String,
+}
+
+/// Manages the llama-server process lifecycle.
+pub struct LlamaManager {
+    process: Mutex<Option<Child>>,
+    port: Mutex<u16>,
+    model_path: Mutex<String>,
+}
+
+impl LlamaManager {
+    pub fn new() -> Self {
+        Self {
+            process: Mutex::new(None),
+            port: Mutex::new(0),
+            model_path: Mutex::new(String::new()),
+        }
+    }
+
+    /// Get the data directory for Voice to Notes.
+    pub fn data_dir() -> PathBuf {
+        let home = std::env::var("HOME")
+            .or_else(|_| std::env::var("USERPROFILE"))
+            .unwrap_or_else(|_| ".".to_string());
+        PathBuf::from(home).join(".voicetonotes")
+    }
+
+    /// Get the models directory.
+    pub fn models_dir() -> PathBuf {
+        Self::data_dir().join("models")
+    }
+
+    /// Find an available port for the server.
+    fn find_available_port() -> Result<u16, String> {
+        let listener =
+            TcpListener::bind("127.0.0.1:0").map_err(|e| format!("Cannot bind port: {e}"))?;
+        let port = listener
+            .local_addr()
+            .map_err(|e| format!("Cannot get port: {e}"))?
+            .port();
+        Ok(port)
+    }
+
+    /// Start the llama-server with the given configuration.
+    pub fn start(&self, config: &LlamaConfig) -> Result<LlamaStatus, String> {
+        // Check if already running
+        {
+            let proc = self.process.lock().map_err(|e| e.to_string())?;
+            if proc.is_some() {
+                let port = *self.port.lock().map_err(|e| e.to_string())?;
+                let model = self.model_path.lock().map_err(|e| e.to_string())?.clone();
+                return Ok(LlamaStatus {
+                    running: true,
+                    port,
+                    model,
+                    url: format!("http://127.0.0.1:{port}"),
+                });
+            }
+        }
+
+        // Validate paths
+        if !config.binary_path.exists() {
+            return Err(format!(
+                "llama-server binary not found at: {}",
+                config.binary_path.display()
+            ));
+        }
+        if !config.model_path.exists() {
+            return Err(format!(
+                "Model file not found at: {}",
+                config.model_path.display()
+            ));
+        }
+
+        // Determine port
+        let port = if config.port == 0 {
+            Self::find_available_port()?
+        } else {
+            config.port
+        };
+
+        // Build command
+        let mut cmd = Command::new(&config.binary_path);
+        cmd.arg("--model")
+            .arg(&config.model_path)
+            .arg("--port")
+            .arg(port.to_string())
+            .arg("--ctx-size")
+            .arg(config.context_size.to_string())
+            .arg("--threads")
+            .arg(config.threads.to_string())
+            .arg("--n-gpu-layers")
+            .arg(config.n_gpu_layers.to_string())
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped());
+
+        let child = cmd
+            .spawn()
+            .map_err(|e| format!("Failed to start llama-server: {e}"))?;
+
+        // Store state
+        let model_name = config
+            .model_path
+            .file_stem()
+            .map(|s| s.to_string_lossy().to_string())
+            .unwrap_or_default();
+
+        {
+            let mut proc = self.process.lock().map_err(|e| e.to_string())?;
+            *proc = Some(child);
+        }
+        {
+            let mut p = self.port.lock().map_err(|e| e.to_string())?;
+            *p = port;
+        }
+        {
+            let mut m = self.model_path.lock().map_err(|e| e.to_string())?;
+            *m = model_name.clone();
+        }
+
+        // Wait for server to be ready (health endpoint)
+        self.wait_for_ready(port)?;
+
+        Ok(LlamaStatus {
+            running: true,
+            port,
+            model: model_name,
+            url: format!("http://127.0.0.1:{port}"),
+        })
+    }
+
+    /// Wait for the llama-server health endpoint to respond.
+    fn wait_for_ready(&self, port: u16) -> Result<(), String> {
+        let start = Instant::now();
+        let timeout = Duration::from_secs(60); // Models can take time to load
+        let _url = format!("http://127.0.0.1:{port}/health");
+
+        loop {
+            if start.elapsed() > timeout {
+                // Kill the process since it didn't start in time
+                self.stop().ok();
+                return Err("llama-server did not start within 60 seconds".to_string());
+            }
+
+            // Check if process is still alive
+            {
+                let mut proc = self.process.lock().map_err(|e| e.to_string())?;
+                if let Some(ref mut child) = *proc {
+                    match child.try_wait() {
+                        Ok(Some(status)) => {
+                            *proc = None;
+                            return Err(format!("llama-server exited with status: {status}"));
+                        }
+                        Ok(None) => {} // Still running
+                        Err(e) => {
+                            return Err(format!("Error checking process: {e}"));
+                        }
+                    }
+                }
+            }
+
+            // Try to connect to health endpoint
+            match std::net::TcpStream::connect_timeout(
+                &format!("127.0.0.1:{port}").parse().unwrap(),
+                Duration::from_millis(500),
+            ) {
+                Ok(_) => return Ok(()),
+                Err(_) => {
+                    std::thread::sleep(Duration::from_millis(500));
+                }
+            }
+        }
+    }
+
+    /// Stop the llama-server process.
+    pub fn stop(&self) -> Result<(), String> {
+        let mut proc = self.process.lock().map_err(|e| e.to_string())?;
+        if let Some(ref mut child) = proc.take() {
+            let _ = child.kill();
+            let _ = child.wait();
+        }
+        Ok(())
+    }
+
+    /// Get the current status.
+    pub fn status(&self) -> LlamaStatus {
+        let running = self
+            .process
+            .lock()
+            .ok()
+            .map_or(false, |p| p.is_some());
+        let port = self.port.lock().ok().map_or(0, |p| *p);
+        let model = self
+            .model_path
+            .lock()
+            .ok()
+            .map_or_else(String::new, |m| m.clone());
+
+        LlamaStatus {
+            running,
+            port,
+            model,
+            url: if running {
+                format!("http://127.0.0.1:{port}")
+            } else {
+                String::new()
+            },
+        }
+    }
+
+    /// List available GGUF model files in the models directory.
+    pub fn list_models() -> Vec<ModelInfo> {
+        let models_dir = Self::models_dir();
+        if !models_dir.exists() {
+            return vec![];
+        }
+
+        let mut models = vec![];
+        if let Ok(entries) = std::fs::read_dir(&models_dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().map_or(false, |ext| ext == "gguf") {
+                    let name = path
+                        .file_stem()
+                        .map(|s| s.to_string_lossy().to_string())
+                        .unwrap_or_default();
+                    let size_bytes = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
+                    models.push(ModelInfo {
+                        name,
+                        path: path.to_string_lossy().to_string(),
+                        size_mb: (size_bytes as f64 / 1_048_576.0).round() as u64,
+                    });
+                }
+            }
+        }
+
+        models.sort_by(|a, b| a.name.cmp(&b.name));
+        models
+    }
+}
+
+impl Drop for LlamaManager {
+    fn drop(&mut self) {
+        let _ = self.stop();
+    }
+}
+
+/// Information about a GGUF model file.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ModelInfo {
+    pub name: String,
+    pub path: String,
+    pub size_mb: u64,
+}
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -20,7 +20,11 @@
      }
    ],
    "security": {
-      "csp": null
+      "csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
+      "assetProtocol": {
+        "enable": true,
+        "scope": ["**"]
+      }
    }
  },
  "bundle": {
@@ -32,6 +36,24 @@
      "icons/128x128@2x.png",
      "icons/icon.icns",
      "icons/icon.ico"
-    ]
+    ],
+    "category": "Utility",
+    "shortDescription": "Transcribe audio/video with speaker identification",
+    "longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
+    "copyright": "Voice to Notes Contributors",
+    "license": "MIT",
+    "linux": {
+      "deb": {
+        "depends": ["python3", "python3-pip"]
+      },
+      "appimage": {
+        "bundleMediaFramework": true
+      }
+    },
+    "windows": {
+      "wix": {
+        "language": "en-US"
+      }
+    }
  }
 }