Phase 6: Llama-server manager, settings UI, packaging, and polish
- Implement LlamaManager in Rust for llama-server lifecycle: spawn with port allocation, health check, clean shutdown on Drop, model listing - Add llama_start/stop/status/list_models Tauri commands - Add load_settings/save_settings commands with JSON persistence - Build SettingsModal with tabs for Transcription, AI Provider, Local AI settings (model size, device, language, API keys, provider selection) - Wire settings into pipeline calls (model, device, language, skip diarization) - Configure Tauri packaging: asset protocol for local audio files, CSP policy, bundle metadata, Linux .deb/.AppImage and Windows .msi config - Add keyboard shortcuts: Space (play/pause), Ctrl+O (import), Ctrl+, (settings), Escape (close menus/modals) - Close export dropdown on outside click - Tests: 30 Python, 6 Rust, 0 Svelte errors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
307
src-tauri/src/llama/mod.rs
Normal file
307
src-tauri/src/llama/mod.rs
Normal file
@@ -0,0 +1,307 @@
|
||||
//! Llama-server lifecycle management.
|
||||
//!
|
||||
//! Manages a bundled llama-server (llama.cpp) binary that exposes an
|
||||
//! OpenAI-compatible API on localhost. The Rust backend handles:
|
||||
//! - Finding or downloading the llama-server binary
|
||||
//! - Spawning the process with a GGUF model file
|
||||
//! - Port allocation and health checking
|
||||
//! - Clean shutdown on app exit
|
||||
|
||||
use std::net::TcpListener;
|
||||
use std::path::PathBuf;
|
||||
use std::process::{Child, Command, Stdio};
|
||||
use std::sync::Mutex;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Configuration for the llama-server instance.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LlamaConfig {
|
||||
/// Path to the llama-server binary.
|
||||
pub binary_path: PathBuf,
|
||||
/// Path to the GGUF model file.
|
||||
pub model_path: PathBuf,
|
||||
/// Port to listen on (0 = auto-assign).
|
||||
pub port: u16,
|
||||
/// Number of GPU layers to offload (-1 = all, 0 = CPU only).
|
||||
pub n_gpu_layers: i32,
|
||||
/// Context window size.
|
||||
pub context_size: u32,
|
||||
/// Number of threads for CPU inference.
|
||||
pub threads: u32,
|
||||
}
|
||||
|
||||
impl Default for LlamaConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
binary_path: PathBuf::from("llama-server"),
|
||||
model_path: PathBuf::new(),
|
||||
port: 0,
|
||||
n_gpu_layers: 0,
|
||||
context_size: 4096,
|
||||
threads: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Status of the llama-server.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LlamaStatus {
|
||||
pub running: bool,
|
||||
pub port: u16,
|
||||
pub model: String,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
/// Manages the llama-server process lifecycle.
|
||||
pub struct LlamaManager {
|
||||
process: Mutex<Option<Child>>,
|
||||
port: Mutex<u16>,
|
||||
model_path: Mutex<String>,
|
||||
}
|
||||
|
||||
impl LlamaManager {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
process: Mutex::new(None),
|
||||
port: Mutex::new(0),
|
||||
model_path: Mutex::new(String::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the data directory for Voice to Notes.
|
||||
pub fn data_dir() -> PathBuf {
|
||||
let home = std::env::var("HOME")
|
||||
.or_else(|_| std::env::var("USERPROFILE"))
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
PathBuf::from(home).join(".voicetonotes")
|
||||
}
|
||||
|
||||
/// Get the models directory.
|
||||
pub fn models_dir() -> PathBuf {
|
||||
Self::data_dir().join("models")
|
||||
}
|
||||
|
||||
/// Find an available port for the server.
|
||||
fn find_available_port() -> Result<u16, String> {
|
||||
let listener =
|
||||
TcpListener::bind("127.0.0.1:0").map_err(|e| format!("Cannot bind port: {e}"))?;
|
||||
let port = listener
|
||||
.local_addr()
|
||||
.map_err(|e| format!("Cannot get port: {e}"))?
|
||||
.port();
|
||||
Ok(port)
|
||||
}
|
||||
|
||||
/// Start the llama-server with the given configuration.
|
||||
pub fn start(&self, config: &LlamaConfig) -> Result<LlamaStatus, String> {
|
||||
// Check if already running
|
||||
{
|
||||
let proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||
if proc.is_some() {
|
||||
let port = *self.port.lock().map_err(|e| e.to_string())?;
|
||||
let model = self.model_path.lock().map_err(|e| e.to_string())?.clone();
|
||||
return Ok(LlamaStatus {
|
||||
running: true,
|
||||
port,
|
||||
model,
|
||||
url: format!("http://127.0.0.1:{port}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Validate paths
|
||||
if !config.binary_path.exists() {
|
||||
return Err(format!(
|
||||
"llama-server binary not found at: {}",
|
||||
config.binary_path.display()
|
||||
));
|
||||
}
|
||||
if !config.model_path.exists() {
|
||||
return Err(format!(
|
||||
"Model file not found at: {}",
|
||||
config.model_path.display()
|
||||
));
|
||||
}
|
||||
|
||||
// Determine port
|
||||
let port = if config.port == 0 {
|
||||
Self::find_available_port()?
|
||||
} else {
|
||||
config.port
|
||||
};
|
||||
|
||||
// Build command
|
||||
let mut cmd = Command::new(&config.binary_path);
|
||||
cmd.arg("--model")
|
||||
.arg(&config.model_path)
|
||||
.arg("--port")
|
||||
.arg(port.to_string())
|
||||
.arg("--ctx-size")
|
||||
.arg(config.context_size.to_string())
|
||||
.arg("--threads")
|
||||
.arg(config.threads.to_string())
|
||||
.arg("--n-gpu-layers")
|
||||
.arg(config.n_gpu_layers.to_string())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let child = cmd
|
||||
.spawn()
|
||||
.map_err(|e| format!("Failed to start llama-server: {e}"))?;
|
||||
|
||||
// Store state
|
||||
let model_name = config
|
||||
.model_path
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
{
|
||||
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||
*proc = Some(child);
|
||||
}
|
||||
{
|
||||
let mut p = self.port.lock().map_err(|e| e.to_string())?;
|
||||
*p = port;
|
||||
}
|
||||
{
|
||||
let mut m = self.model_path.lock().map_err(|e| e.to_string())?;
|
||||
*m = model_name.clone();
|
||||
}
|
||||
|
||||
// Wait for server to be ready (health endpoint)
|
||||
self.wait_for_ready(port)?;
|
||||
|
||||
Ok(LlamaStatus {
|
||||
running: true,
|
||||
port,
|
||||
model: model_name,
|
||||
url: format!("http://127.0.0.1:{port}"),
|
||||
})
|
||||
}
|
||||
|
||||
/// Wait for the llama-server health endpoint to respond.
|
||||
fn wait_for_ready(&self, port: u16) -> Result<(), String> {
|
||||
let start = Instant::now();
|
||||
let timeout = Duration::from_secs(60); // Models can take time to load
|
||||
let _url = format!("http://127.0.0.1:{port}/health");
|
||||
|
||||
loop {
|
||||
if start.elapsed() > timeout {
|
||||
// Kill the process since it didn't start in time
|
||||
self.stop().ok();
|
||||
return Err("llama-server did not start within 60 seconds".to_string());
|
||||
}
|
||||
|
||||
// Check if process is still alive
|
||||
{
|
||||
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||
if let Some(ref mut child) = *proc {
|
||||
match child.try_wait() {
|
||||
Ok(Some(status)) => {
|
||||
*proc = None;
|
||||
return Err(format!("llama-server exited with status: {status}"));
|
||||
}
|
||||
Ok(None) => {} // Still running
|
||||
Err(e) => {
|
||||
return Err(format!("Error checking process: {e}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to connect to health endpoint
|
||||
match std::net::TcpStream::connect_timeout(
|
||||
&format!("127.0.0.1:{port}").parse().unwrap(),
|
||||
Duration::from_millis(500),
|
||||
) {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(_) => {
|
||||
std::thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stop the llama-server process.
|
||||
pub fn stop(&self) -> Result<(), String> {
|
||||
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
|
||||
if let Some(ref mut child) = proc.take() {
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the current status.
|
||||
pub fn status(&self) -> LlamaStatus {
|
||||
let running = self
|
||||
.process
|
||||
.lock()
|
||||
.ok()
|
||||
.map_or(false, |p| p.is_some());
|
||||
let port = self.port.lock().ok().map_or(0, |p| *p);
|
||||
let model = self
|
||||
.model_path
|
||||
.lock()
|
||||
.ok()
|
||||
.map_or_else(String::new, |m| m.clone());
|
||||
|
||||
LlamaStatus {
|
||||
running,
|
||||
port,
|
||||
model,
|
||||
url: if running {
|
||||
format!("http://127.0.0.1:{port}")
|
||||
} else {
|
||||
String::new()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// List available GGUF model files in the models directory.
|
||||
pub fn list_models() -> Vec<ModelInfo> {
|
||||
let models_dir = Self::models_dir();
|
||||
if !models_dir.exists() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut models = vec![];
|
||||
if let Ok(entries) = std::fs::read_dir(&models_dir) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.extension().map_or(false, |ext| ext == "gguf") {
|
||||
let name = path
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
let size_bytes = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
|
||||
models.push(ModelInfo {
|
||||
name,
|
||||
path: path.to_string_lossy().to_string(),
|
||||
size_mb: (size_bytes as f64 / 1_048_576.0).round() as u64,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
models.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
models
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for LlamaManager {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/// Information about a GGUF model file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ModelInfo {
|
||||
pub name: String,
|
||||
pub path: String,
|
||||
pub size_mb: u64,
|
||||
}
|
||||
Reference in New Issue
Block a user