Add speech-to-text feature using Faster Whisper container
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped

Adds a mic button to the terminal UI that captures speech, transcribes
it via a Faster Whisper sidecar container, and injects the text into
the terminal input. Includes settings panel for model selection
(tiny/small/medium), port config, and container lifecycle management.

- stt-container/: Dockerfile + FastAPI server for Whisper transcription
- Rust backend: STT container management, transcribe_audio IPC command
- Frontend: useSTT hook, SttButton, SttSettings, WAV encoder
- CI: Gitea Actions workflow for multi-arch STT image builds

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-12 20:02:39 -07:00
parent 8301fd3690
commit 532de77927
19 changed files with 1121 additions and 2 deletions

View File

@@ -2345,6 +2345,16 @@ version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
[[package]]
name = "mime_guess"
version = "2.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
dependencies = [
"mime",
"unicase",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
@@ -3454,6 +3464,7 @@ dependencies = [
"base64 0.22.1",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
@@ -3462,6 +3473,7 @@ dependencies = [
"hyper-util",
"js-sys",
"log",
"mime_guess",
"percent-encoding",
"pin-project-lite",
"quinn",
@@ -5053,6 +5065,12 @@ dependencies = [
"unic-common",
]
[[package]]
name = "unicase"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-ident"
version = "1.0.24"

View File

@@ -29,7 +29,7 @@ log = "0.4"
fern = { version = "0.7", features = ["date-based"] }
tar = "0.4"
include_dir = "0.7"
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "multipart"] }
iana-time-zone = "0.1"
sha2 = "0.10"
axum = { version = "0.8", features = ["ws"] }

View File

@@ -5,6 +5,7 @@ pub mod help_commands;
pub mod mcp_commands;
pub mod project_commands;
pub mod settings_commands;
pub mod stt_commands;
pub mod terminal_commands;
pub mod update_commands;
pub mod web_terminal_commands;

View File

@@ -0,0 +1,92 @@
use tauri::{AppHandle, Emitter, State};
use crate::docker::stt;
use crate::models::app_settings::SttStatus;
use crate::AppState;
#[tauri::command]
pub async fn get_stt_status(state: State<'_, AppState>) -> Result<SttStatus, String> {
let settings = state.settings_store.get();
stt::get_stt_status(&settings.stt).await
}
#[tauri::command]
pub async fn start_stt(state: State<'_, AppState>) -> Result<SttStatus, String> {
let settings = state.settings_store.get();
stt::ensure_stt_running(&settings.stt).await
}
#[tauri::command]
pub async fn stop_stt() -> Result<(), String> {
stt::stop_stt_container().await
}
#[tauri::command]
pub async fn build_stt_image(app_handle: AppHandle) -> Result<(), String> {
stt::build_stt_image(move |msg| {
let _ = app_handle.emit("stt-build-progress", &msg);
})
.await
}
#[tauri::command]
pub async fn pull_stt_image(app_handle: AppHandle) -> Result<(), String> {
stt::pull_stt_image(move |msg| {
let _ = app_handle.emit("stt-pull-progress", &msg);
})
.await
}
#[tauri::command]
pub async fn transcribe_audio(
audio_data: Vec<u8>,
state: State<'_, AppState>,
) -> Result<String, String> {
let settings = state.settings_store.get();
if !settings.stt.enabled {
return Err("STT is not enabled".to_string());
}
let url = format!("http://127.0.0.1:{}/transcribe", settings.stt.port);
let file_part = reqwest::multipart::Part::bytes(audio_data)
.file_name("recording.wav")
.mime_str("audio/wav")
.map_err(|e| format!("Failed to create multipart: {}", e))?;
let mut form = reqwest::multipart::Form::new().part("file", file_part);
if let Some(ref lang) = settings.stt.language {
form = form.text("language", lang.clone());
}
let client = reqwest::Client::new();
let response = client
.post(&url)
.multipart(form)
.send()
.await
.map_err(|e| {
if e.is_connect() {
"STT container is not running. Start it from Settings.".to_string()
} else {
format!("Transcription request failed: {}", e)
}
})?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(format!("Transcription failed ({}): {}", status, body));
}
let result: serde_json::Value = response
.json()
.await
.map_err(|e| format!("Failed to parse transcription response: {}", e))?;
result["text"]
.as_str()
.map(|s| s.to_string())
.ok_or_else(|| "No text in transcription response".to_string())
}

View File

@@ -3,7 +3,10 @@ pub mod container;
pub mod image;
pub mod exec;
pub mod network;
pub mod stt;
#[allow(unused_imports)]
pub use stt::*;
#[allow(unused_imports)]
pub use client::*;
#[allow(unused_imports)]

View File

@@ -0,0 +1,266 @@
use bollard::container::{
Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions,
StartContainerOptions, StopContainerOptions,
};
use bollard::image::BuildImageOptions;
use bollard::models::{HostConfig, Mount, MountTypeEnum, PortBinding};
use futures_util::StreamExt;
use std::collections::HashMap;
use std::io::Write;
use super::client::get_docker;
use crate::models::app_settings::{SttSettings, SttStatus};
const STT_CONTAINER_NAME: &str = "triple-c-stt";
const STT_MODEL_VOLUME: &str = "triple-c-stt-model-cache";
const STT_REGISTRY_IMAGE: &str = "ghcr.io/shadowdao/triple-c-stt:latest";
const STT_LOCAL_IMAGE: &str = "triple-c-stt:latest";
const STT_DOCKERFILE: &str = include_str!("../../../../stt-container/Dockerfile");
const STT_SERVER: &str = include_str!("../../../../stt-container/server.py");
pub async fn get_stt_status(settings: &SttSettings) -> Result<SttStatus, String> {
let image_exists = super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false)
|| super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false);
let (container_exists, running, model) = match find_stt_container().await? {
Some((_, state, env_model)) => (true, state == "running", env_model),
None => (false, false, settings.model.clone()),
};
Ok(SttStatus {
container_exists,
running,
port: settings.port,
model,
image_exists,
})
}
async fn find_stt_container() -> Result<Option<(String, String, String)>, String> {
let docker = get_docker()?;
let filters: HashMap<String, Vec<String>> = HashMap::from([(
"name".to_string(),
vec![format!("/{}", STT_CONTAINER_NAME)],
)]);
let containers = docker
.list_containers(Some(ListContainersOptions {
all: true,
filters,
..Default::default()
}))
.await
.map_err(|e| format!("Failed to list containers: {}", e))?;
if let Some(container) = containers.first() {
let id = container.id.clone().unwrap_or_default();
let state = container.state.clone().unwrap_or_default();
// Extract WHISPER_MODEL from container env
let model = container
.labels
.as_ref()
.and_then(|l| l.get("triple-c.stt.model"))
.cloned()
.unwrap_or_else(|| "tiny".to_string());
return Ok(Some((id, state, model)));
}
Ok(None)
}
async fn create_stt_container(settings: &SttSettings) -> Result<String, String> {
let docker = get_docker()?;
// Try local image first, fall back to registry
let image = if super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false) {
STT_LOCAL_IMAGE.to_string()
} else if super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false) {
STT_REGISTRY_IMAGE.to_string()
} else {
return Err("STT image not found. Please build or pull the image first.".to_string());
};
let port_binding = PortBinding {
host_ip: Some("127.0.0.1".to_string()),
host_port: Some(settings.port.to_string()),
};
let mut port_bindings = HashMap::new();
port_bindings.insert(
"9876/tcp".to_string(),
Some(vec![port_binding]),
);
let host_config = HostConfig {
port_bindings: Some(port_bindings),
mounts: Some(vec![Mount {
target: Some("/root/.cache/huggingface".to_string()),
source: Some(STT_MODEL_VOLUME.to_string()),
typ: Some(MountTypeEnum::VOLUME),
..Default::default()
}]),
..Default::default()
};
let mut labels = HashMap::new();
labels.insert(
"triple-c.stt.model".to_string(),
settings.model.clone(),
);
labels.insert(
"triple-c.stt.port".to_string(),
settings.port.to_string(),
);
let config = Config {
image: Some(image),
env: Some(vec![format!("WHISPER_MODEL={}", settings.model)]),
host_config: Some(host_config),
labels: Some(labels),
..Default::default()
};
let options = CreateContainerOptions {
name: STT_CONTAINER_NAME,
..Default::default()
};
let response = docker
.create_container(Some(options), config)
.await
.map_err(|e| format!("Failed to create STT container: {}", e))?;
Ok(response.id)
}
pub async fn ensure_stt_running(settings: &SttSettings) -> Result<SttStatus, String> {
let docker = get_docker()?;
// Check if container exists and if settings match
if let Some((id, state, model)) = find_stt_container().await? {
let needs_recreate = model != settings.model;
if needs_recreate {
// Settings changed, recreate
if state == "running" {
docker
.stop_container(&id, None::<StopContainerOptions>)
.await
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
}
docker
.remove_container(
&id,
Some(RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await
.map_err(|e| format!("Failed to remove STT container: {}", e))?;
} else if state == "running" {
return get_stt_status(settings).await;
} else {
// Container exists but stopped, start it
docker
.start_container(&id, None::<StartContainerOptions<String>>)
.await
.map_err(|e| format!("Failed to start STT container: {}", e))?;
return get_stt_status(settings).await;
}
}
// Create and start new container
let id = create_stt_container(settings).await?;
docker
.start_container(&id, None::<StartContainerOptions<String>>)
.await
.map_err(|e| format!("Failed to start STT container: {}", e))?;
get_stt_status(settings).await
}
pub async fn stop_stt_container() -> Result<(), String> {
let docker = get_docker()?;
if let Some((id, state, _)) = find_stt_container().await? {
if state == "running" {
docker
.stop_container(&id, None::<StopContainerOptions>)
.await
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
}
}
Ok(())
}
pub async fn pull_stt_image<F>(on_progress: F) -> Result<(), String>
where
F: Fn(String) + Send + 'static,
{
super::image::pull_image(STT_REGISTRY_IMAGE, on_progress).await
}
pub async fn build_stt_image<F>(on_progress: F) -> Result<(), String>
where
F: Fn(String) + Send + 'static,
{
let docker = get_docker()?;
let tar_bytes = create_stt_build_context()
.map_err(|e| format!("Failed to create STT build context: {}", e))?;
let options = BuildImageOptions {
t: STT_LOCAL_IMAGE,
rm: true,
forcerm: true,
..Default::default()
};
let mut stream = docker.build_image(options, None, Some(tar_bytes.into()));
while let Some(result) = stream.next().await {
match result {
Ok(output) => {
if let Some(stream) = output.stream {
on_progress(stream);
}
if let Some(error) = output.error {
return Err(format!("Build error: {}", error));
}
}
Err(e) => return Err(format!("Build stream error: {}", e)),
}
}
Ok(())
}
fn create_stt_build_context() -> Result<Vec<u8>, std::io::Error> {
let mut buf = Vec::new();
{
let mut archive = tar::Builder::new(&mut buf);
let mut dockerfile_header = tar::Header::new_gnu();
dockerfile_header.set_size(STT_DOCKERFILE.len() as u64);
dockerfile_header.set_mode(0o644);
dockerfile_header.set_cksum();
archive.append_data(&mut dockerfile_header, "Dockerfile", STT_DOCKERFILE.as_bytes())?;
let mut server_header = tar::Header::new_gnu();
server_header.set_size(STT_SERVER.len() as u64);
server_header.set_mode(0o644);
server_header.set_cksum();
archive.append_data(&mut server_header, "server.py", STT_SERVER.as_bytes())?;
archive.finish()?;
}
let _ = buf.flush();
Ok(buf)
}

View File

@@ -122,6 +122,8 @@ pub fn run() {
if let Some(server) = server_guard.take() {
server.stop();
}
// Stop STT container
let _ = docker::stt::stop_stt_container().await;
// Close all exec sessions
state.exec_manager.close_all_sessions().await;
});
@@ -181,6 +183,13 @@ pub fn run() {
commands::web_terminal_commands::stop_web_terminal,
commands::web_terminal_commands::get_web_terminal_status,
commands::web_terminal_commands::regenerate_web_terminal_token,
// STT
commands::stt_commands::get_stt_status,
commands::stt_commands::start_stt,
commands::stt_commands::stop_stt,
commands::stt_commands::build_stt_image,
commands::stt_commands::pull_stt_image,
commands::stt_commands::transcribe_audio,
])
.run(tauri::generate_context!())
.expect("error while running tauri application");

View File

@@ -76,6 +76,48 @@ pub struct AppSettings {
pub dismissed_image_digest: Option<String>,
#[serde(default)]
pub web_terminal: WebTerminalSettings,
#[serde(default)]
pub stt: SttSettings,
}
fn default_stt_model() -> String {
"tiny".to_string()
}
fn default_stt_port() -> u16 {
9876
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SttSettings {
#[serde(default)]
pub enabled: bool,
#[serde(default = "default_stt_model")]
pub model: String,
#[serde(default = "default_stt_port")]
pub port: u16,
#[serde(default)]
pub language: Option<String>,
}
impl Default for SttSettings {
fn default() -> Self {
Self {
enabled: false,
model: default_stt_model(),
port: 9876,
language: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SttStatus {
pub container_exists: bool,
pub running: bool,
pub port: u16,
pub model: String,
pub image_exists: bool,
}
fn default_web_terminal_port() -> u16 {
@@ -120,6 +162,7 @@ impl Default for AppSettings {
default_microphone: None,
dismissed_image_digest: None,
web_terminal: WebTerminalSettings::default(),
stt: SttSettings::default(),
}
}
}