Phase 6: Llama-server manager, settings UI, packaging, and polish

- Implement LlamaManager in Rust for llama-server lifecycle: spawn with
  port allocation, health check, clean shutdown on Drop, model listing
- Add llama_start/stop/status/list_models Tauri commands
- Add load_settings/save_settings commands with JSON persistence
- Build SettingsModal with tabs for Transcription, AI Provider, Local AI
  settings (model size, device, language, API keys, provider selection)
- Wire settings into pipeline calls (model, device, language, skip diarization)
- Configure Tauri packaging: asset protocol for local audio files,
  CSP policy, bundle metadata, Linux .deb/.AppImage and Windows .msi config
- Add keyboard shortcuts: Space (play/pause), Ctrl+O (import),
  Ctrl+, (settings), Escape (close menus/modals)
- Close export dropdown on outside click
- Tests: 30 Python, 6 Rust, 0 Svelte errors

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-26 16:38:23 -08:00
parent d67625cd5a
commit 97a1a15755
12 changed files with 860 additions and 10 deletions

7
src-tauri/Cargo.lock generated
View File

@@ -1498,6 +1498,12 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-range"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21dec9db110f5f872ed9699c3ecf50cf16f423502706ba5c72462e28d3157573"
[[package]]
name = "httparse"
version = "1.10.1"
@@ -3595,6 +3601,7 @@ dependencies = [
"gtk",
"heck 0.5.0",
"http",
"http-range",
"jni",
"libc",
"log",

View File

@@ -14,7 +14,7 @@ crate-type = ["staticlib", "cdylib", "rlib"]
tauri-build = { version = "2", features = [] }
[dependencies]
tauri = { version = "2", features = [] }
tauri = { version = "2", features = ["protocol-asset"] }
tauri-plugin-opener = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

View File

@@ -25,7 +25,7 @@ pub fn ai_chat(
let manager = get_sidecar()?;
let request_id = uuid::Uuid::new_v4().to_string();
let mut payload = json!({
let payload = json!({
"action": "chat",
"messages": messages,
"transcript_context": transcript_context.unwrap_or_default(),

View File

@@ -1,2 +1,34 @@
// Settings commands — app preferences, model selection, AI provider config
// TODO: Implement when settings UI is built
use serde_json::{json, Value};
use std::fs;
use std::path::PathBuf;
use crate::llama::LlamaManager;
fn settings_path() -> PathBuf {
LlamaManager::data_dir().join("settings.json")
}
/// Load app settings from disk.
#[tauri::command]
pub fn load_settings() -> Value {
let path = settings_path();
if !path.exists() {
return json!({});
}
match fs::read_to_string(&path) {
Ok(content) => serde_json::from_str(&content).unwrap_or(json!({})),
Err(_) => json!({}),
}
}
/// Save app settings to disk.
#[tauri::command]
pub fn save_settings(settings: Value) -> Result<(), String> {
let path = settings_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|e| format!("Cannot create settings dir: {e}"))?;
}
let json = serde_json::to_string_pretty(&settings).map_err(|e| e.to_string())?;
fs::write(&path, json).map_err(|e| format!("Cannot write settings: {e}"))?;
Ok(())
}

View File

@@ -1,2 +1,64 @@
// System commands — hardware detection, llama-server lifecycle
// TODO: Implement hardware detection and llama-server management
use serde_json::{json, Value};
use crate::llama::{LlamaConfig, LlamaManager, LlamaStatus};
use std::path::PathBuf;
use std::sync::OnceLock;
/// Global llama manager — persists across command invocations.
fn llama_manager() -> &'static LlamaManager {
static INSTANCE: OnceLock<LlamaManager> = OnceLock::new();
INSTANCE.get_or_init(LlamaManager::new)
}
/// Start the local llama-server with a GGUF model.
#[tauri::command]
pub fn llama_start(
model_path: String,
binary_path: Option<String>,
port: Option<u16>,
n_gpu_layers: Option<i32>,
context_size: Option<u32>,
threads: Option<u32>,
) -> Result<LlamaStatus, String> {
let config = LlamaConfig {
binary_path: PathBuf::from(
binary_path.unwrap_or_else(|| "llama-server".to_string()),
),
model_path: PathBuf::from(model_path),
port: port.unwrap_or(0),
n_gpu_layers: n_gpu_layers.unwrap_or(0),
context_size: context_size.unwrap_or(4096),
threads: threads.unwrap_or(4),
};
llama_manager().start(&config)
}
/// Stop the local llama-server.
#[tauri::command]
pub fn llama_stop() -> Result<(), String> {
llama_manager().stop()
}
/// Get the status of the local llama-server.
#[tauri::command]
pub fn llama_status() -> LlamaStatus {
llama_manager().status()
}
/// List available GGUF models in the models directory.
#[tauri::command]
pub fn llama_list_models() -> Value {
let models = LlamaManager::list_models();
json!({
"models": models,
"models_dir": LlamaManager::models_dir().to_string_lossy(),
})
}
/// Get the app data directory path.
#[tauri::command]
pub fn get_data_dir() -> String {
LlamaManager::data_dir().to_string_lossy().to_string()
}

View File

@@ -1,11 +1,14 @@
pub mod commands;
pub mod db;
pub mod llama;
pub mod sidecar;
pub mod state;
use commands::ai::{ai_chat, ai_configure, ai_list_providers};
use commands::export::export_transcript;
use commands::project::{create_project, get_project, list_projects};
use commands::settings::{load_settings, save_settings};
use commands::system::{get_data_dir, llama_list_models, llama_start, llama_status, llama_stop};
use commands::transcribe::{run_pipeline, transcribe_file};
#[cfg_attr(mobile, tauri::mobile_entry_point)]
@@ -23,6 +26,13 @@ pub fn run() {
ai_chat,
ai_list_providers,
ai_configure,
llama_start,
llama_stop,
llama_status,
llama_list_models,
get_data_dir,
load_settings,
save_settings,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

307
src-tauri/src/llama/mod.rs Normal file
View File

@@ -0,0 +1,307 @@
//! Llama-server lifecycle management.
//!
//! Manages a bundled llama-server (llama.cpp) binary that exposes an
//! OpenAI-compatible API on localhost. The Rust backend handles:
//! - Finding or downloading the llama-server binary
//! - Spawning the process with a GGUF model file
//! - Port allocation and health checking
//! - Clean shutdown on app exit
use std::net::TcpListener;
use std::path::PathBuf;
use std::process::{Child, Command, Stdio};
use std::sync::Mutex;
use std::time::{Duration, Instant};
use serde::{Deserialize, Serialize};
/// Configuration for the llama-server instance.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlamaConfig {
/// Path to the llama-server binary.
pub binary_path: PathBuf,
/// Path to the GGUF model file.
pub model_path: PathBuf,
/// Port to listen on (0 = auto-assign).
pub port: u16,
/// Number of GPU layers to offload (-1 = all, 0 = CPU only).
pub n_gpu_layers: i32,
/// Context window size.
pub context_size: u32,
/// Number of threads for CPU inference.
pub threads: u32,
}
impl Default for LlamaConfig {
fn default() -> Self {
Self {
binary_path: PathBuf::from("llama-server"),
model_path: PathBuf::new(),
port: 0,
n_gpu_layers: 0,
context_size: 4096,
threads: 4,
}
}
}
/// Status of the llama-server.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlamaStatus {
pub running: bool,
pub port: u16,
pub model: String,
pub url: String,
}
/// Manages the llama-server process lifecycle.
pub struct LlamaManager {
process: Mutex<Option<Child>>,
port: Mutex<u16>,
model_path: Mutex<String>,
}
impl LlamaManager {
pub fn new() -> Self {
Self {
process: Mutex::new(None),
port: Mutex::new(0),
model_path: Mutex::new(String::new()),
}
}
/// Get the data directory for Voice to Notes.
pub fn data_dir() -> PathBuf {
let home = std::env::var("HOME")
.or_else(|_| std::env::var("USERPROFILE"))
.unwrap_or_else(|_| ".".to_string());
PathBuf::from(home).join(".voicetonotes")
}
/// Get the models directory.
pub fn models_dir() -> PathBuf {
Self::data_dir().join("models")
}
/// Find an available port for the server.
fn find_available_port() -> Result<u16, String> {
let listener =
TcpListener::bind("127.0.0.1:0").map_err(|e| format!("Cannot bind port: {e}"))?;
let port = listener
.local_addr()
.map_err(|e| format!("Cannot get port: {e}"))?
.port();
Ok(port)
}
/// Start the llama-server with the given configuration.
pub fn start(&self, config: &LlamaConfig) -> Result<LlamaStatus, String> {
// Check if already running
{
let proc = self.process.lock().map_err(|e| e.to_string())?;
if proc.is_some() {
let port = *self.port.lock().map_err(|e| e.to_string())?;
let model = self.model_path.lock().map_err(|e| e.to_string())?.clone();
return Ok(LlamaStatus {
running: true,
port,
model,
url: format!("http://127.0.0.1:{port}"),
});
}
}
// Validate paths
if !config.binary_path.exists() {
return Err(format!(
"llama-server binary not found at: {}",
config.binary_path.display()
));
}
if !config.model_path.exists() {
return Err(format!(
"Model file not found at: {}",
config.model_path.display()
));
}
// Determine port
let port = if config.port == 0 {
Self::find_available_port()?
} else {
config.port
};
// Build command
let mut cmd = Command::new(&config.binary_path);
cmd.arg("--model")
.arg(&config.model_path)
.arg("--port")
.arg(port.to_string())
.arg("--ctx-size")
.arg(config.context_size.to_string())
.arg("--threads")
.arg(config.threads.to_string())
.arg("--n-gpu-layers")
.arg(config.n_gpu_layers.to_string())
.stdout(Stdio::piped())
.stderr(Stdio::piped());
let child = cmd
.spawn()
.map_err(|e| format!("Failed to start llama-server: {e}"))?;
// Store state
let model_name = config
.model_path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
{
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
*proc = Some(child);
}
{
let mut p = self.port.lock().map_err(|e| e.to_string())?;
*p = port;
}
{
let mut m = self.model_path.lock().map_err(|e| e.to_string())?;
*m = model_name.clone();
}
// Wait for server to be ready (health endpoint)
self.wait_for_ready(port)?;
Ok(LlamaStatus {
running: true,
port,
model: model_name,
url: format!("http://127.0.0.1:{port}"),
})
}
/// Wait for the llama-server health endpoint to respond.
fn wait_for_ready(&self, port: u16) -> Result<(), String> {
let start = Instant::now();
let timeout = Duration::from_secs(60); // Models can take time to load
let _url = format!("http://127.0.0.1:{port}/health");
loop {
if start.elapsed() > timeout {
// Kill the process since it didn't start in time
self.stop().ok();
return Err("llama-server did not start within 60 seconds".to_string());
}
// Check if process is still alive
{
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = *proc {
match child.try_wait() {
Ok(Some(status)) => {
*proc = None;
return Err(format!("llama-server exited with status: {status}"));
}
Ok(None) => {} // Still running
Err(e) => {
return Err(format!("Error checking process: {e}"));
}
}
}
}
// Try to connect to health endpoint
match std::net::TcpStream::connect_timeout(
&format!("127.0.0.1:{port}").parse().unwrap(),
Duration::from_millis(500),
) {
Ok(_) => return Ok(()),
Err(_) => {
std::thread::sleep(Duration::from_millis(500));
}
}
}
}
/// Stop the llama-server process.
pub fn stop(&self) -> Result<(), String> {
let mut proc = self.process.lock().map_err(|e| e.to_string())?;
if let Some(ref mut child) = proc.take() {
let _ = child.kill();
let _ = child.wait();
}
Ok(())
}
/// Get the current status.
pub fn status(&self) -> LlamaStatus {
let running = self
.process
.lock()
.ok()
.map_or(false, |p| p.is_some());
let port = self.port.lock().ok().map_or(0, |p| *p);
let model = self
.model_path
.lock()
.ok()
.map_or_else(String::new, |m| m.clone());
LlamaStatus {
running,
port,
model,
url: if running {
format!("http://127.0.0.1:{port}")
} else {
String::new()
},
}
}
/// List available GGUF model files in the models directory.
pub fn list_models() -> Vec<ModelInfo> {
let models_dir = Self::models_dir();
if !models_dir.exists() {
return vec![];
}
let mut models = vec![];
if let Ok(entries) = std::fs::read_dir(&models_dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.extension().map_or(false, |ext| ext == "gguf") {
let name = path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_default();
let size_bytes = std::fs::metadata(&path).map(|m| m.len()).unwrap_or(0);
models.push(ModelInfo {
name,
path: path.to_string_lossy().to_string(),
size_mb: (size_bytes as f64 / 1_048_576.0).round() as u64,
});
}
}
}
models.sort_by(|a, b| a.name.cmp(&b.name));
models
}
}
impl Drop for LlamaManager {
fn drop(&mut self) {
let _ = self.stop();
}
}
/// Information about a GGUF model file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelInfo {
pub name: String,
pub path: String,
pub size_mb: u64,
}

View File

@@ -20,7 +20,11 @@
}
],
"security": {
"csp": null
"csp": "default-src 'self'; img-src 'self' asset: https://asset.localhost; media-src 'self' asset: https://asset.localhost; style-src 'self' 'unsafe-inline'",
"assetProtocol": {
"enable": true,
"scope": ["**"]
}
}
},
"bundle": {
@@ -32,6 +36,24 @@
"icons/128x128@2x.png",
"icons/icon.icns",
"icons/icon.ico"
]
],
"category": "Utility",
"shortDescription": "Transcribe audio/video with speaker identification",
"longDescription": "Voice to Notes is a desktop application that transcribes audio and video recordings with speaker identification, synchronized playback, and AI-powered analysis. Export to SRT, WebVTT, ASS captions, or plain text.",
"copyright": "Voice to Notes Contributors",
"license": "MIT",
"linux": {
"deb": {
"depends": ["python3", "python3-pip"]
},
"appimage": {
"bundleMediaFramework": true
}
},
"windows": {
"wix": {
"language": "en-US"
}
}
}
}