Add speech-to-text feature using Faster Whisper container
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Adds a mic button to the terminal UI that captures speech, transcribes it via a Faster Whisper sidecar container, and injects the text into the terminal input. Includes settings panel for model selection (tiny/small/medium), port config, and container lifecycle management. - stt-container/: Dockerfile + FastAPI server for Whisper transcription - Rust backend: STT container management, transcribe_audio IPC command - Frontend: useSTT hook, SttButton, SttSettings, WAV encoder - CI: Gitea Actions workflow for multi-arch STT image builds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
18
app/src-tauri/Cargo.lock
generated
18
app/src-tauri/Cargo.lock
generated
@@ -2345,6 +2345,16 @@ version = "0.3.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||
|
||||
[[package]]
|
||||
name = "mime_guess"
|
||||
version = "2.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
|
||||
dependencies = [
|
||||
"mime",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.9"
|
||||
@@ -3454,6 +3464,7 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http-body-util",
|
||||
@@ -3462,6 +3473,7 @@ dependencies = [
|
||||
"hyper-util",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime_guess",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"quinn",
|
||||
@@ -5053,6 +5065,12 @@ dependencies = [
|
||||
"unic-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
|
||||
@@ -29,7 +29,7 @@ log = "0.4"
|
||||
fern = { version = "0.7", features = ["date-based"] }
|
||||
tar = "0.4"
|
||||
include_dir = "0.7"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "multipart"] }
|
||||
iana-time-zone = "0.1"
|
||||
sha2 = "0.10"
|
||||
axum = { version = "0.8", features = ["ws"] }
|
||||
|
||||
@@ -5,6 +5,7 @@ pub mod help_commands;
|
||||
pub mod mcp_commands;
|
||||
pub mod project_commands;
|
||||
pub mod settings_commands;
|
||||
pub mod stt_commands;
|
||||
pub mod terminal_commands;
|
||||
pub mod update_commands;
|
||||
pub mod web_terminal_commands;
|
||||
|
||||
92
app/src-tauri/src/commands/stt_commands.rs
Normal file
92
app/src-tauri/src/commands/stt_commands.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
use tauri::{AppHandle, Emitter, State};
|
||||
|
||||
use crate::docker::stt;
|
||||
use crate::models::app_settings::SttStatus;
|
||||
use crate::AppState;
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn get_stt_status(state: State<'_, AppState>) -> Result<SttStatus, String> {
|
||||
let settings = state.settings_store.get();
|
||||
stt::get_stt_status(&settings.stt).await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn start_stt(state: State<'_, AppState>) -> Result<SttStatus, String> {
|
||||
let settings = state.settings_store.get();
|
||||
stt::ensure_stt_running(&settings.stt).await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn stop_stt() -> Result<(), String> {
|
||||
stt::stop_stt_container().await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn build_stt_image(app_handle: AppHandle) -> Result<(), String> {
|
||||
stt::build_stt_image(move |msg| {
|
||||
let _ = app_handle.emit("stt-build-progress", &msg);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn pull_stt_image(app_handle: AppHandle) -> Result<(), String> {
|
||||
stt::pull_stt_image(move |msg| {
|
||||
let _ = app_handle.emit("stt-pull-progress", &msg);
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn transcribe_audio(
|
||||
audio_data: Vec<u8>,
|
||||
state: State<'_, AppState>,
|
||||
) -> Result<String, String> {
|
||||
let settings = state.settings_store.get();
|
||||
if !settings.stt.enabled {
|
||||
return Err("STT is not enabled".to_string());
|
||||
}
|
||||
|
||||
let url = format!("http://127.0.0.1:{}/transcribe", settings.stt.port);
|
||||
|
||||
let file_part = reqwest::multipart::Part::bytes(audio_data)
|
||||
.file_name("recording.wav")
|
||||
.mime_str("audio/wav")
|
||||
.map_err(|e| format!("Failed to create multipart: {}", e))?;
|
||||
|
||||
let mut form = reqwest::multipart::Form::new().part("file", file_part);
|
||||
|
||||
if let Some(ref lang) = settings.stt.language {
|
||||
form = form.text("language", lang.clone());
|
||||
}
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.post(&url)
|
||||
.multipart(form)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
if e.is_connect() {
|
||||
"STT container is not running. Start it from Settings.".to_string()
|
||||
} else {
|
||||
format!("Transcription request failed: {}", e)
|
||||
}
|
||||
})?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
let status = response.status();
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
return Err(format!("Transcription failed ({}): {}", status, body));
|
||||
}
|
||||
|
||||
let result: serde_json::Value = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to parse transcription response: {}", e))?;
|
||||
|
||||
result["text"]
|
||||
.as_str()
|
||||
.map(|s| s.to_string())
|
||||
.ok_or_else(|| "No text in transcription response".to_string())
|
||||
}
|
||||
@@ -3,7 +3,10 @@ pub mod container;
|
||||
pub mod image;
|
||||
pub mod exec;
|
||||
pub mod network;
|
||||
pub mod stt;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use stt::*;
|
||||
#[allow(unused_imports)]
|
||||
pub use client::*;
|
||||
#[allow(unused_imports)]
|
||||
|
||||
266
app/src-tauri/src/docker/stt.rs
Normal file
266
app/src-tauri/src/docker/stt.rs
Normal file
@@ -0,0 +1,266 @@
|
||||
use bollard::container::{
|
||||
Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions,
|
||||
StartContainerOptions, StopContainerOptions,
|
||||
};
|
||||
use bollard::image::BuildImageOptions;
|
||||
use bollard::models::{HostConfig, Mount, MountTypeEnum, PortBinding};
|
||||
use futures_util::StreamExt;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
|
||||
use super::client::get_docker;
|
||||
use crate::models::app_settings::{SttSettings, SttStatus};
|
||||
|
||||
const STT_CONTAINER_NAME: &str = "triple-c-stt";
|
||||
const STT_MODEL_VOLUME: &str = "triple-c-stt-model-cache";
|
||||
const STT_REGISTRY_IMAGE: &str = "ghcr.io/shadowdao/triple-c-stt:latest";
|
||||
const STT_LOCAL_IMAGE: &str = "triple-c-stt:latest";
|
||||
const STT_DOCKERFILE: &str = include_str!("../../../../stt-container/Dockerfile");
|
||||
const STT_SERVER: &str = include_str!("../../../../stt-container/server.py");
|
||||
|
||||
pub async fn get_stt_status(settings: &SttSettings) -> Result<SttStatus, String> {
|
||||
let image_exists = super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false)
|
||||
|| super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false);
|
||||
|
||||
let (container_exists, running, model) = match find_stt_container().await? {
|
||||
Some((_, state, env_model)) => (true, state == "running", env_model),
|
||||
None => (false, false, settings.model.clone()),
|
||||
};
|
||||
|
||||
Ok(SttStatus {
|
||||
container_exists,
|
||||
running,
|
||||
port: settings.port,
|
||||
model,
|
||||
image_exists,
|
||||
})
|
||||
}
|
||||
|
||||
async fn find_stt_container() -> Result<Option<(String, String, String)>, String> {
|
||||
let docker = get_docker()?;
|
||||
|
||||
let filters: HashMap<String, Vec<String>> = HashMap::from([(
|
||||
"name".to_string(),
|
||||
vec![format!("/{}", STT_CONTAINER_NAME)],
|
||||
)]);
|
||||
|
||||
let containers = docker
|
||||
.list_containers(Some(ListContainersOptions {
|
||||
all: true,
|
||||
filters,
|
||||
..Default::default()
|
||||
}))
|
||||
.await
|
||||
.map_err(|e| format!("Failed to list containers: {}", e))?;
|
||||
|
||||
if let Some(container) = containers.first() {
|
||||
let id = container.id.clone().unwrap_or_default();
|
||||
let state = container.state.clone().unwrap_or_default();
|
||||
|
||||
// Extract WHISPER_MODEL from container env
|
||||
let model = container
|
||||
.labels
|
||||
.as_ref()
|
||||
.and_then(|l| l.get("triple-c.stt.model"))
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "tiny".to_string());
|
||||
|
||||
return Ok(Some((id, state, model)));
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn create_stt_container(settings: &SttSettings) -> Result<String, String> {
|
||||
let docker = get_docker()?;
|
||||
|
||||
// Try local image first, fall back to registry
|
||||
let image = if super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false) {
|
||||
STT_LOCAL_IMAGE.to_string()
|
||||
} else if super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false) {
|
||||
STT_REGISTRY_IMAGE.to_string()
|
||||
} else {
|
||||
return Err("STT image not found. Please build or pull the image first.".to_string());
|
||||
};
|
||||
|
||||
let port_binding = PortBinding {
|
||||
host_ip: Some("127.0.0.1".to_string()),
|
||||
host_port: Some(settings.port.to_string()),
|
||||
};
|
||||
|
||||
let mut port_bindings = HashMap::new();
|
||||
port_bindings.insert(
|
||||
"9876/tcp".to_string(),
|
||||
Some(vec![port_binding]),
|
||||
);
|
||||
|
||||
let host_config = HostConfig {
|
||||
port_bindings: Some(port_bindings),
|
||||
mounts: Some(vec![Mount {
|
||||
target: Some("/root/.cache/huggingface".to_string()),
|
||||
source: Some(STT_MODEL_VOLUME.to_string()),
|
||||
typ: Some(MountTypeEnum::VOLUME),
|
||||
..Default::default()
|
||||
}]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut labels = HashMap::new();
|
||||
labels.insert(
|
||||
"triple-c.stt.model".to_string(),
|
||||
settings.model.clone(),
|
||||
);
|
||||
labels.insert(
|
||||
"triple-c.stt.port".to_string(),
|
||||
settings.port.to_string(),
|
||||
);
|
||||
|
||||
let config = Config {
|
||||
image: Some(image),
|
||||
env: Some(vec![format!("WHISPER_MODEL={}", settings.model)]),
|
||||
host_config: Some(host_config),
|
||||
labels: Some(labels),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let options = CreateContainerOptions {
|
||||
name: STT_CONTAINER_NAME,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let response = docker
|
||||
.create_container(Some(options), config)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to create STT container: {}", e))?;
|
||||
|
||||
Ok(response.id)
|
||||
}
|
||||
|
||||
pub async fn ensure_stt_running(settings: &SttSettings) -> Result<SttStatus, String> {
|
||||
let docker = get_docker()?;
|
||||
|
||||
// Check if container exists and if settings match
|
||||
if let Some((id, state, model)) = find_stt_container().await? {
|
||||
let needs_recreate = model != settings.model;
|
||||
|
||||
if needs_recreate {
|
||||
// Settings changed, recreate
|
||||
if state == "running" {
|
||||
docker
|
||||
.stop_container(&id, None::<StopContainerOptions>)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
|
||||
}
|
||||
docker
|
||||
.remove_container(
|
||||
&id,
|
||||
Some(RemoveContainerOptions {
|
||||
force: true,
|
||||
..Default::default()
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to remove STT container: {}", e))?;
|
||||
} else if state == "running" {
|
||||
return get_stt_status(settings).await;
|
||||
} else {
|
||||
// Container exists but stopped, start it
|
||||
docker
|
||||
.start_container(&id, None::<StartContainerOptions<String>>)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to start STT container: {}", e))?;
|
||||
return get_stt_status(settings).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Create and start new container
|
||||
let id = create_stt_container(settings).await?;
|
||||
docker
|
||||
.start_container(&id, None::<StartContainerOptions<String>>)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to start STT container: {}", e))?;
|
||||
|
||||
get_stt_status(settings).await
|
||||
}
|
||||
|
||||
pub async fn stop_stt_container() -> Result<(), String> {
|
||||
let docker = get_docker()?;
|
||||
|
||||
if let Some((id, state, _)) = find_stt_container().await? {
|
||||
if state == "running" {
|
||||
docker
|
||||
.stop_container(&id, None::<StopContainerOptions>)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn pull_stt_image<F>(on_progress: F) -> Result<(), String>
|
||||
where
|
||||
F: Fn(String) + Send + 'static,
|
||||
{
|
||||
super::image::pull_image(STT_REGISTRY_IMAGE, on_progress).await
|
||||
}
|
||||
|
||||
pub async fn build_stt_image<F>(on_progress: F) -> Result<(), String>
|
||||
where
|
||||
F: Fn(String) + Send + 'static,
|
||||
{
|
||||
let docker = get_docker()?;
|
||||
|
||||
let tar_bytes = create_stt_build_context()
|
||||
.map_err(|e| format!("Failed to create STT build context: {}", e))?;
|
||||
|
||||
let options = BuildImageOptions {
|
||||
t: STT_LOCAL_IMAGE,
|
||||
rm: true,
|
||||
forcerm: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut stream = docker.build_image(options, None, Some(tar_bytes.into()));
|
||||
|
||||
while let Some(result) = stream.next().await {
|
||||
match result {
|
||||
Ok(output) => {
|
||||
if let Some(stream) = output.stream {
|
||||
on_progress(stream);
|
||||
}
|
||||
if let Some(error) = output.error {
|
||||
return Err(format!("Build error: {}", error));
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(format!("Build stream error: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_stt_build_context() -> Result<Vec<u8>, std::io::Error> {
|
||||
let mut buf = Vec::new();
|
||||
{
|
||||
let mut archive = tar::Builder::new(&mut buf);
|
||||
|
||||
let mut dockerfile_header = tar::Header::new_gnu();
|
||||
dockerfile_header.set_size(STT_DOCKERFILE.len() as u64);
|
||||
dockerfile_header.set_mode(0o644);
|
||||
dockerfile_header.set_cksum();
|
||||
archive.append_data(&mut dockerfile_header, "Dockerfile", STT_DOCKERFILE.as_bytes())?;
|
||||
|
||||
let mut server_header = tar::Header::new_gnu();
|
||||
server_header.set_size(STT_SERVER.len() as u64);
|
||||
server_header.set_mode(0o644);
|
||||
server_header.set_cksum();
|
||||
archive.append_data(&mut server_header, "server.py", STT_SERVER.as_bytes())?;
|
||||
|
||||
archive.finish()?;
|
||||
}
|
||||
|
||||
let _ = buf.flush();
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
@@ -122,6 +122,8 @@ pub fn run() {
|
||||
if let Some(server) = server_guard.take() {
|
||||
server.stop();
|
||||
}
|
||||
// Stop STT container
|
||||
let _ = docker::stt::stop_stt_container().await;
|
||||
// Close all exec sessions
|
||||
state.exec_manager.close_all_sessions().await;
|
||||
});
|
||||
@@ -181,6 +183,13 @@ pub fn run() {
|
||||
commands::web_terminal_commands::stop_web_terminal,
|
||||
commands::web_terminal_commands::get_web_terminal_status,
|
||||
commands::web_terminal_commands::regenerate_web_terminal_token,
|
||||
// STT
|
||||
commands::stt_commands::get_stt_status,
|
||||
commands::stt_commands::start_stt,
|
||||
commands::stt_commands::stop_stt,
|
||||
commands::stt_commands::build_stt_image,
|
||||
commands::stt_commands::pull_stt_image,
|
||||
commands::stt_commands::transcribe_audio,
|
||||
])
|
||||
.run(tauri::generate_context!())
|
||||
.expect("error while running tauri application");
|
||||
|
||||
@@ -76,6 +76,48 @@ pub struct AppSettings {
|
||||
pub dismissed_image_digest: Option<String>,
|
||||
#[serde(default)]
|
||||
pub web_terminal: WebTerminalSettings,
|
||||
#[serde(default)]
|
||||
pub stt: SttSettings,
|
||||
}
|
||||
|
||||
fn default_stt_model() -> String {
|
||||
"tiny".to_string()
|
||||
}
|
||||
|
||||
fn default_stt_port() -> u16 {
|
||||
9876
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SttSettings {
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
#[serde(default = "default_stt_model")]
|
||||
pub model: String,
|
||||
#[serde(default = "default_stt_port")]
|
||||
pub port: u16,
|
||||
#[serde(default)]
|
||||
pub language: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for SttSettings {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
model: default_stt_model(),
|
||||
port: 9876,
|
||||
language: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SttStatus {
|
||||
pub container_exists: bool,
|
||||
pub running: bool,
|
||||
pub port: u16,
|
||||
pub model: String,
|
||||
pub image_exists: bool,
|
||||
}
|
||||
|
||||
fn default_web_terminal_port() -> u16 {
|
||||
@@ -120,6 +162,7 @@ impl Default for AppSettings {
|
||||
default_microphone: None,
|
||||
dismissed_image_digest: None,
|
||||
web_terminal: WebTerminalSettings::default(),
|
||||
stt: SttSettings::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user