Add speech-to-text feature using Faster Whisper container
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Adds a mic button to the terminal UI that captures speech, transcribes it via a Faster Whisper sidecar container, and injects the text into the terminal input. Includes settings panel for model selection (tiny/small/medium), port config, and container lifecycle management. - stt-container/: Dockerfile + FastAPI server for Whisper transcription - Rust backend: STT container management, transcribe_audio IPC command - Frontend: useSTT hook, SttButton, SttSettings, WAV encoder - CI: Gitea Actions workflow for multi-arch STT image builds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
59
.gitea/workflows/build-stt.yml
Normal file
59
.gitea/workflows/build-stt.yml
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
name: Build STT Container
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- "stt-container/**"
|
||||||
|
- ".gitea/workflows/build-stt.yml"
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- "stt-container/**"
|
||||||
|
- ".gitea/workflows/build-stt.yml"
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: repo.anhonesthost.net
|
||||||
|
IMAGE_NAME: cybercovellc/triple-c/triple-c-stt
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-stt-container:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v3
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Gitea Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ gitea.actor }}
|
||||||
|
password: ${{ secrets.REGISTRY_TOKEN }}
|
||||||
|
|
||||||
|
- name: Login to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: shadowdao
|
||||||
|
password: ${{ secrets.GH_PAT }}
|
||||||
|
|
||||||
|
- name: Build and push STT container image
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: ./stt-container
|
||||||
|
file: ./stt-container/Dockerfile
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
push: ${{ gitea.event_name == 'push' }}
|
||||||
|
tags: |
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
|
||||||
|
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ gitea.sha }}
|
||||||
|
ghcr.io/shadowdao/triple-c-stt:latest
|
||||||
|
ghcr.io/shadowdao/triple-c-stt:${{ gitea.sha }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
18
app/src-tauri/Cargo.lock
generated
18
app/src-tauri/Cargo.lock
generated
@@ -2345,6 +2345,16 @@ version = "0.3.17"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mime_guess"
|
||||||
|
version = "2.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e"
|
||||||
|
dependencies = [
|
||||||
|
"mime",
|
||||||
|
"unicase",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "miniz_oxide"
|
name = "miniz_oxide"
|
||||||
version = "0.8.9"
|
version = "0.8.9"
|
||||||
@@ -3454,6 +3464,7 @@ dependencies = [
|
|||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
"futures-util",
|
||||||
"http",
|
"http",
|
||||||
"http-body",
|
"http-body",
|
||||||
"http-body-util",
|
"http-body-util",
|
||||||
@@ -3462,6 +3473,7 @@ dependencies = [
|
|||||||
"hyper-util",
|
"hyper-util",
|
||||||
"js-sys",
|
"js-sys",
|
||||||
"log",
|
"log",
|
||||||
|
"mime_guess",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"quinn",
|
"quinn",
|
||||||
@@ -5053,6 +5065,12 @@ dependencies = [
|
|||||||
"unic-common",
|
"unic-common",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicase"
|
||||||
|
version = "2.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.24"
|
version = "1.0.24"
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ log = "0.4"
|
|||||||
fern = { version = "0.7", features = ["date-based"] }
|
fern = { version = "0.7", features = ["date-based"] }
|
||||||
tar = "0.4"
|
tar = "0.4"
|
||||||
include_dir = "0.7"
|
include_dir = "0.7"
|
||||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "multipart"] }
|
||||||
iana-time-zone = "0.1"
|
iana-time-zone = "0.1"
|
||||||
sha2 = "0.10"
|
sha2 = "0.10"
|
||||||
axum = { version = "0.8", features = ["ws"] }
|
axum = { version = "0.8", features = ["ws"] }
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ pub mod help_commands;
|
|||||||
pub mod mcp_commands;
|
pub mod mcp_commands;
|
||||||
pub mod project_commands;
|
pub mod project_commands;
|
||||||
pub mod settings_commands;
|
pub mod settings_commands;
|
||||||
|
pub mod stt_commands;
|
||||||
pub mod terminal_commands;
|
pub mod terminal_commands;
|
||||||
pub mod update_commands;
|
pub mod update_commands;
|
||||||
pub mod web_terminal_commands;
|
pub mod web_terminal_commands;
|
||||||
|
|||||||
92
app/src-tauri/src/commands/stt_commands.rs
Normal file
92
app/src-tauri/src/commands/stt_commands.rs
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
use tauri::{AppHandle, Emitter, State};
|
||||||
|
|
||||||
|
use crate::docker::stt;
|
||||||
|
use crate::models::app_settings::SttStatus;
|
||||||
|
use crate::AppState;
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn get_stt_status(state: State<'_, AppState>) -> Result<SttStatus, String> {
|
||||||
|
let settings = state.settings_store.get();
|
||||||
|
stt::get_stt_status(&settings.stt).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn start_stt(state: State<'_, AppState>) -> Result<SttStatus, String> {
|
||||||
|
let settings = state.settings_store.get();
|
||||||
|
stt::ensure_stt_running(&settings.stt).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn stop_stt() -> Result<(), String> {
|
||||||
|
stt::stop_stt_container().await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn build_stt_image(app_handle: AppHandle) -> Result<(), String> {
|
||||||
|
stt::build_stt_image(move |msg| {
|
||||||
|
let _ = app_handle.emit("stt-build-progress", &msg);
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn pull_stt_image(app_handle: AppHandle) -> Result<(), String> {
|
||||||
|
stt::pull_stt_image(move |msg| {
|
||||||
|
let _ = app_handle.emit("stt-pull-progress", &msg);
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tauri::command]
|
||||||
|
pub async fn transcribe_audio(
|
||||||
|
audio_data: Vec<u8>,
|
||||||
|
state: State<'_, AppState>,
|
||||||
|
) -> Result<String, String> {
|
||||||
|
let settings = state.settings_store.get();
|
||||||
|
if !settings.stt.enabled {
|
||||||
|
return Err("STT is not enabled".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
let url = format!("http://127.0.0.1:{}/transcribe", settings.stt.port);
|
||||||
|
|
||||||
|
let file_part = reqwest::multipart::Part::bytes(audio_data)
|
||||||
|
.file_name("recording.wav")
|
||||||
|
.mime_str("audio/wav")
|
||||||
|
.map_err(|e| format!("Failed to create multipart: {}", e))?;
|
||||||
|
|
||||||
|
let mut form = reqwest::multipart::Form::new().part("file", file_part);
|
||||||
|
|
||||||
|
if let Some(ref lang) = settings.stt.language {
|
||||||
|
form = form.text("language", lang.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let response = client
|
||||||
|
.post(&url)
|
||||||
|
.multipart(form)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
if e.is_connect() {
|
||||||
|
"STT container is not running. Start it from Settings.".to_string()
|
||||||
|
} else {
|
||||||
|
format!("Transcription request failed: {}", e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if !response.status().is_success() {
|
||||||
|
let status = response.status();
|
||||||
|
let body = response.text().await.unwrap_or_default();
|
||||||
|
return Err(format!("Transcription failed ({}): {}", status, body));
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: serde_json::Value = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to parse transcription response: {}", e))?;
|
||||||
|
|
||||||
|
result["text"]
|
||||||
|
.as_str()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.ok_or_else(|| "No text in transcription response".to_string())
|
||||||
|
}
|
||||||
@@ -3,7 +3,10 @@ pub mod container;
|
|||||||
pub mod image;
|
pub mod image;
|
||||||
pub mod exec;
|
pub mod exec;
|
||||||
pub mod network;
|
pub mod network;
|
||||||
|
pub mod stt;
|
||||||
|
|
||||||
|
#[allow(unused_imports)]
|
||||||
|
pub use stt::*;
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
pub use client::*;
|
pub use client::*;
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
|
|||||||
266
app/src-tauri/src/docker/stt.rs
Normal file
266
app/src-tauri/src/docker/stt.rs
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
use bollard::container::{
|
||||||
|
Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions,
|
||||||
|
StartContainerOptions, StopContainerOptions,
|
||||||
|
};
|
||||||
|
use bollard::image::BuildImageOptions;
|
||||||
|
use bollard::models::{HostConfig, Mount, MountTypeEnum, PortBinding};
|
||||||
|
use futures_util::StreamExt;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::io::Write;
|
||||||
|
|
||||||
|
use super::client::get_docker;
|
||||||
|
use crate::models::app_settings::{SttSettings, SttStatus};
|
||||||
|
|
||||||
|
const STT_CONTAINER_NAME: &str = "triple-c-stt";
|
||||||
|
const STT_MODEL_VOLUME: &str = "triple-c-stt-model-cache";
|
||||||
|
const STT_REGISTRY_IMAGE: &str = "ghcr.io/shadowdao/triple-c-stt:latest";
|
||||||
|
const STT_LOCAL_IMAGE: &str = "triple-c-stt:latest";
|
||||||
|
const STT_DOCKERFILE: &str = include_str!("../../../../stt-container/Dockerfile");
|
||||||
|
const STT_SERVER: &str = include_str!("../../../../stt-container/server.py");
|
||||||
|
|
||||||
|
pub async fn get_stt_status(settings: &SttSettings) -> Result<SttStatus, String> {
|
||||||
|
let image_exists = super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false)
|
||||||
|
|| super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false);
|
||||||
|
|
||||||
|
let (container_exists, running, model) = match find_stt_container().await? {
|
||||||
|
Some((_, state, env_model)) => (true, state == "running", env_model),
|
||||||
|
None => (false, false, settings.model.clone()),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(SttStatus {
|
||||||
|
container_exists,
|
||||||
|
running,
|
||||||
|
port: settings.port,
|
||||||
|
model,
|
||||||
|
image_exists,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn find_stt_container() -> Result<Option<(String, String, String)>, String> {
|
||||||
|
let docker = get_docker()?;
|
||||||
|
|
||||||
|
let filters: HashMap<String, Vec<String>> = HashMap::from([(
|
||||||
|
"name".to_string(),
|
||||||
|
vec![format!("/{}", STT_CONTAINER_NAME)],
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let containers = docker
|
||||||
|
.list_containers(Some(ListContainersOptions {
|
||||||
|
all: true,
|
||||||
|
filters,
|
||||||
|
..Default::default()
|
||||||
|
}))
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to list containers: {}", e))?;
|
||||||
|
|
||||||
|
if let Some(container) = containers.first() {
|
||||||
|
let id = container.id.clone().unwrap_or_default();
|
||||||
|
let state = container.state.clone().unwrap_or_default();
|
||||||
|
|
||||||
|
// Extract WHISPER_MODEL from container env
|
||||||
|
let model = container
|
||||||
|
.labels
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|l| l.get("triple-c.stt.model"))
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_else(|| "tiny".to_string());
|
||||||
|
|
||||||
|
return Ok(Some((id, state, model)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_stt_container(settings: &SttSettings) -> Result<String, String> {
|
||||||
|
let docker = get_docker()?;
|
||||||
|
|
||||||
|
// Try local image first, fall back to registry
|
||||||
|
let image = if super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false) {
|
||||||
|
STT_LOCAL_IMAGE.to_string()
|
||||||
|
} else if super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false) {
|
||||||
|
STT_REGISTRY_IMAGE.to_string()
|
||||||
|
} else {
|
||||||
|
return Err("STT image not found. Please build or pull the image first.".to_string());
|
||||||
|
};
|
||||||
|
|
||||||
|
let port_binding = PortBinding {
|
||||||
|
host_ip: Some("127.0.0.1".to_string()),
|
||||||
|
host_port: Some(settings.port.to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut port_bindings = HashMap::new();
|
||||||
|
port_bindings.insert(
|
||||||
|
"9876/tcp".to_string(),
|
||||||
|
Some(vec![port_binding]),
|
||||||
|
);
|
||||||
|
|
||||||
|
let host_config = HostConfig {
|
||||||
|
port_bindings: Some(port_bindings),
|
||||||
|
mounts: Some(vec![Mount {
|
||||||
|
target: Some("/root/.cache/huggingface".to_string()),
|
||||||
|
source: Some(STT_MODEL_VOLUME.to_string()),
|
||||||
|
typ: Some(MountTypeEnum::VOLUME),
|
||||||
|
..Default::default()
|
||||||
|
}]),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut labels = HashMap::new();
|
||||||
|
labels.insert(
|
||||||
|
"triple-c.stt.model".to_string(),
|
||||||
|
settings.model.clone(),
|
||||||
|
);
|
||||||
|
labels.insert(
|
||||||
|
"triple-c.stt.port".to_string(),
|
||||||
|
settings.port.to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let config = Config {
|
||||||
|
image: Some(image),
|
||||||
|
env: Some(vec![format!("WHISPER_MODEL={}", settings.model)]),
|
||||||
|
host_config: Some(host_config),
|
||||||
|
labels: Some(labels),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let options = CreateContainerOptions {
|
||||||
|
name: STT_CONTAINER_NAME,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let response = docker
|
||||||
|
.create_container(Some(options), config)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to create STT container: {}", e))?;
|
||||||
|
|
||||||
|
Ok(response.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn ensure_stt_running(settings: &SttSettings) -> Result<SttStatus, String> {
|
||||||
|
let docker = get_docker()?;
|
||||||
|
|
||||||
|
// Check if container exists and if settings match
|
||||||
|
if let Some((id, state, model)) = find_stt_container().await? {
|
||||||
|
let needs_recreate = model != settings.model;
|
||||||
|
|
||||||
|
if needs_recreate {
|
||||||
|
// Settings changed, recreate
|
||||||
|
if state == "running" {
|
||||||
|
docker
|
||||||
|
.stop_container(&id, None::<StopContainerOptions>)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
|
||||||
|
}
|
||||||
|
docker
|
||||||
|
.remove_container(
|
||||||
|
&id,
|
||||||
|
Some(RemoveContainerOptions {
|
||||||
|
force: true,
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to remove STT container: {}", e))?;
|
||||||
|
} else if state == "running" {
|
||||||
|
return get_stt_status(settings).await;
|
||||||
|
} else {
|
||||||
|
// Container exists but stopped, start it
|
||||||
|
docker
|
||||||
|
.start_container(&id, None::<StartContainerOptions<String>>)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to start STT container: {}", e))?;
|
||||||
|
return get_stt_status(settings).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create and start new container
|
||||||
|
let id = create_stt_container(settings).await?;
|
||||||
|
docker
|
||||||
|
.start_container(&id, None::<StartContainerOptions<String>>)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to start STT container: {}", e))?;
|
||||||
|
|
||||||
|
get_stt_status(settings).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn stop_stt_container() -> Result<(), String> {
|
||||||
|
let docker = get_docker()?;
|
||||||
|
|
||||||
|
if let Some((id, state, _)) = find_stt_container().await? {
|
||||||
|
if state == "running" {
|
||||||
|
docker
|
||||||
|
.stop_container(&id, None::<StopContainerOptions>)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Failed to stop STT container: {}", e))?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn pull_stt_image<F>(on_progress: F) -> Result<(), String>
|
||||||
|
where
|
||||||
|
F: Fn(String) + Send + 'static,
|
||||||
|
{
|
||||||
|
super::image::pull_image(STT_REGISTRY_IMAGE, on_progress).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn build_stt_image<F>(on_progress: F) -> Result<(), String>
|
||||||
|
where
|
||||||
|
F: Fn(String) + Send + 'static,
|
||||||
|
{
|
||||||
|
let docker = get_docker()?;
|
||||||
|
|
||||||
|
let tar_bytes = create_stt_build_context()
|
||||||
|
.map_err(|e| format!("Failed to create STT build context: {}", e))?;
|
||||||
|
|
||||||
|
let options = BuildImageOptions {
|
||||||
|
t: STT_LOCAL_IMAGE,
|
||||||
|
rm: true,
|
||||||
|
forcerm: true,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut stream = docker.build_image(options, None, Some(tar_bytes.into()));
|
||||||
|
|
||||||
|
while let Some(result) = stream.next().await {
|
||||||
|
match result {
|
||||||
|
Ok(output) => {
|
||||||
|
if let Some(stream) = output.stream {
|
||||||
|
on_progress(stream);
|
||||||
|
}
|
||||||
|
if let Some(error) = output.error {
|
||||||
|
return Err(format!("Build error: {}", error));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => return Err(format!("Build stream error: {}", e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_stt_build_context() -> Result<Vec<u8>, std::io::Error> {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
{
|
||||||
|
let mut archive = tar::Builder::new(&mut buf);
|
||||||
|
|
||||||
|
let mut dockerfile_header = tar::Header::new_gnu();
|
||||||
|
dockerfile_header.set_size(STT_DOCKERFILE.len() as u64);
|
||||||
|
dockerfile_header.set_mode(0o644);
|
||||||
|
dockerfile_header.set_cksum();
|
||||||
|
archive.append_data(&mut dockerfile_header, "Dockerfile", STT_DOCKERFILE.as_bytes())?;
|
||||||
|
|
||||||
|
let mut server_header = tar::Header::new_gnu();
|
||||||
|
server_header.set_size(STT_SERVER.len() as u64);
|
||||||
|
server_header.set_mode(0o644);
|
||||||
|
server_header.set_cksum();
|
||||||
|
archive.append_data(&mut server_header, "server.py", STT_SERVER.as_bytes())?;
|
||||||
|
|
||||||
|
archive.finish()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = buf.flush();
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
@@ -122,6 +122,8 @@ pub fn run() {
|
|||||||
if let Some(server) = server_guard.take() {
|
if let Some(server) = server_guard.take() {
|
||||||
server.stop();
|
server.stop();
|
||||||
}
|
}
|
||||||
|
// Stop STT container
|
||||||
|
let _ = docker::stt::stop_stt_container().await;
|
||||||
// Close all exec sessions
|
// Close all exec sessions
|
||||||
state.exec_manager.close_all_sessions().await;
|
state.exec_manager.close_all_sessions().await;
|
||||||
});
|
});
|
||||||
@@ -181,6 +183,13 @@ pub fn run() {
|
|||||||
commands::web_terminal_commands::stop_web_terminal,
|
commands::web_terminal_commands::stop_web_terminal,
|
||||||
commands::web_terminal_commands::get_web_terminal_status,
|
commands::web_terminal_commands::get_web_terminal_status,
|
||||||
commands::web_terminal_commands::regenerate_web_terminal_token,
|
commands::web_terminal_commands::regenerate_web_terminal_token,
|
||||||
|
// STT
|
||||||
|
commands::stt_commands::get_stt_status,
|
||||||
|
commands::stt_commands::start_stt,
|
||||||
|
commands::stt_commands::stop_stt,
|
||||||
|
commands::stt_commands::build_stt_image,
|
||||||
|
commands::stt_commands::pull_stt_image,
|
||||||
|
commands::stt_commands::transcribe_audio,
|
||||||
])
|
])
|
||||||
.run(tauri::generate_context!())
|
.run(tauri::generate_context!())
|
||||||
.expect("error while running tauri application");
|
.expect("error while running tauri application");
|
||||||
|
|||||||
@@ -76,6 +76,48 @@ pub struct AppSettings {
|
|||||||
pub dismissed_image_digest: Option<String>,
|
pub dismissed_image_digest: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub web_terminal: WebTerminalSettings,
|
pub web_terminal: WebTerminalSettings,
|
||||||
|
#[serde(default)]
|
||||||
|
pub stt: SttSettings,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_stt_model() -> String {
|
||||||
|
"tiny".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_stt_port() -> u16 {
|
||||||
|
9876
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SttSettings {
|
||||||
|
#[serde(default)]
|
||||||
|
pub enabled: bool,
|
||||||
|
#[serde(default = "default_stt_model")]
|
||||||
|
pub model: String,
|
||||||
|
#[serde(default = "default_stt_port")]
|
||||||
|
pub port: u16,
|
||||||
|
#[serde(default)]
|
||||||
|
pub language: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SttSettings {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
model: default_stt_model(),
|
||||||
|
port: 9876,
|
||||||
|
language: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SttStatus {
|
||||||
|
pub container_exists: bool,
|
||||||
|
pub running: bool,
|
||||||
|
pub port: u16,
|
||||||
|
pub model: String,
|
||||||
|
pub image_exists: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_web_terminal_port() -> u16 {
|
fn default_web_terminal_port() -> u16 {
|
||||||
@@ -120,6 +162,7 @@ impl Default for AppSettings {
|
|||||||
default_microphone: None,
|
default_microphone: None,
|
||||||
dismissed_image_digest: None,
|
dismissed_image_digest: None,
|
||||||
web_terminal: WebTerminalSettings::default(),
|
web_terminal: WebTerminalSettings::default(),
|
||||||
|
stt: SttSettings::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { detectHostTimezone } from "../../lib/tauri-commands";
|
|||||||
import type { EnvVar } from "../../lib/types";
|
import type { EnvVar } from "../../lib/types";
|
||||||
import Tooltip from "../ui/Tooltip";
|
import Tooltip from "../ui/Tooltip";
|
||||||
import WebTerminalSettings from "./WebTerminalSettings";
|
import WebTerminalSettings from "./WebTerminalSettings";
|
||||||
|
import SttSettings from "./SttSettings";
|
||||||
|
|
||||||
export default function SettingsPanel() {
|
export default function SettingsPanel() {
|
||||||
const { appSettings, saveSettings } = useSettings();
|
const { appSettings, saveSettings } = useSettings();
|
||||||
@@ -120,6 +121,9 @@ export default function SettingsPanel() {
|
|||||||
{/* Web Terminal */}
|
{/* Web Terminal */}
|
||||||
<WebTerminalSettings />
|
<WebTerminalSettings />
|
||||||
|
|
||||||
|
{/* Speech to Text */}
|
||||||
|
<SttSettings />
|
||||||
|
|
||||||
{/* Updates section */}
|
{/* Updates section */}
|
||||||
<div>
|
<div>
|
||||||
<label className="block text-sm font-medium mb-2">Updates<Tooltip text="Check for new versions of the Triple-C app and container image." /></label>
|
<label className="block text-sm font-medium mb-2">Updates<Tooltip text="Check for new versions of the Triple-C app and container image." /></label>
|
||||||
|
|||||||
249
app/src/components/settings/SttSettings.tsx
Normal file
249
app/src/components/settings/SttSettings.tsx
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
import { useState, useEffect } from "react";
|
||||||
|
import { useSettings } from "../../hooks/useSettings";
|
||||||
|
import { getSttStatus, startStt, stopStt, pullSttImage, buildSttImage } from "../../lib/tauri-commands";
|
||||||
|
import { listen } from "@tauri-apps/api/event";
|
||||||
|
import type { SttStatus } from "../../lib/types";
|
||||||
|
import Tooltip from "../ui/Tooltip";
|
||||||
|
|
||||||
|
export default function SttSettings() {
|
||||||
|
const { appSettings, saveSettings } = useSettings();
|
||||||
|
const [status, setStatus] = useState<SttStatus | null>(null);
|
||||||
|
const [loading, setLoading] = useState(false);
|
||||||
|
const [pulling, setPulling] = useState(false);
|
||||||
|
const [building, setBuilding] = useState(false);
|
||||||
|
const [buildLog, setBuildLog] = useState<string | null>(null);
|
||||||
|
const [model, setModel] = useState(appSettings?.stt?.model ?? "tiny");
|
||||||
|
const [port, setPort] = useState(String(appSettings?.stt?.port ?? 9876));
|
||||||
|
const [language, setLanguage] = useState(appSettings?.stt?.language ?? "");
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
setModel(appSettings?.stt?.model ?? "tiny");
|
||||||
|
setPort(String(appSettings?.stt?.port ?? 9876));
|
||||||
|
setLanguage(appSettings?.stt?.language ?? "");
|
||||||
|
}, [appSettings?.stt?.model, appSettings?.stt?.port, appSettings?.stt?.language]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
refreshStatus();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const refreshStatus = () => {
|
||||||
|
getSttStatus().then(setStatus).catch(console.error);
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleToggleEnabled = async () => {
|
||||||
|
if (!appSettings) return;
|
||||||
|
const newEnabled = !appSettings.stt.enabled;
|
||||||
|
await saveSettings({
|
||||||
|
...appSettings,
|
||||||
|
stt: { ...appSettings.stt, enabled: newEnabled },
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSaveModel = async () => {
|
||||||
|
if (!appSettings) return;
|
||||||
|
await saveSettings({
|
||||||
|
...appSettings,
|
||||||
|
stt: { ...appSettings.stt, model },
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSavePort = async () => {
|
||||||
|
if (!appSettings) return;
|
||||||
|
const portNum = parseInt(port, 10);
|
||||||
|
if (isNaN(portNum) || portNum < 1 || portNum > 65535) return;
|
||||||
|
await saveSettings({
|
||||||
|
...appSettings,
|
||||||
|
stt: { ...appSettings.stt, port: portNum },
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSaveLanguage = async () => {
|
||||||
|
if (!appSettings) return;
|
||||||
|
await saveSettings({
|
||||||
|
...appSettings,
|
||||||
|
stt: { ...appSettings.stt, language: language || null },
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleStartStop = async () => {
|
||||||
|
setLoading(true);
|
||||||
|
try {
|
||||||
|
if (status?.running) {
|
||||||
|
await stopStt();
|
||||||
|
} else {
|
||||||
|
await startStt();
|
||||||
|
}
|
||||||
|
refreshStatus();
|
||||||
|
} catch (e) {
|
||||||
|
console.error("STT toggle failed:", e);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handlePull = async () => {
|
||||||
|
setPulling(true);
|
||||||
|
setBuildLog(null);
|
||||||
|
const unlisten = await listen<string>("stt-pull-progress", (event) => {
|
||||||
|
setBuildLog(event.payload);
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
await pullSttImage();
|
||||||
|
refreshStatus();
|
||||||
|
} catch (e) {
|
||||||
|
console.error("STT image pull failed:", e);
|
||||||
|
setBuildLog(`Error: ${e}`);
|
||||||
|
} finally {
|
||||||
|
setPulling(false);
|
||||||
|
unlisten();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleBuild = async () => {
|
||||||
|
setBuilding(true);
|
||||||
|
setBuildLog(null);
|
||||||
|
const unlisten = await listen<string>("stt-build-progress", (event) => {
|
||||||
|
setBuildLog(event.payload);
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
await buildSttImage();
|
||||||
|
refreshStatus();
|
||||||
|
} catch (e) {
|
||||||
|
console.error("STT image build failed:", e);
|
||||||
|
setBuildLog(`Error: ${e}`);
|
||||||
|
} finally {
|
||||||
|
setBuilding(false);
|
||||||
|
unlisten();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<label className="block text-sm font-medium mb-1">
|
||||||
|
Speech to Text
|
||||||
|
<Tooltip text="Transcribe speech to text using Faster Whisper in a Docker container. Adds a mic button to the terminal." />
|
||||||
|
</label>
|
||||||
|
<p className="text-xs text-[var(--text-secondary)] mb-2">
|
||||||
|
Click the mic button in the terminal to dictate text via speech recognition.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div className="space-y-2">
|
||||||
|
{/* Enable toggle */}
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={handleToggleEnabled}
|
||||||
|
className={`px-2 py-0.5 text-xs rounded transition-colors ${
|
||||||
|
appSettings?.stt?.enabled
|
||||||
|
? "bg-[var(--success)] text-white"
|
||||||
|
: "bg-[var(--bg-primary)] border border-[var(--border-color)] text-[var(--text-secondary)]"
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{appSettings?.stt?.enabled ? "ON" : "OFF"}
|
||||||
|
</button>
|
||||||
|
<span className="text-xs text-[var(--text-secondary)]">
|
||||||
|
{appSettings?.stt?.enabled ? "Enabled" : "Disabled"}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{appSettings?.stt?.enabled && (
|
||||||
|
<>
|
||||||
|
{/* Model selector */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-[var(--text-secondary)] mb-1">Model</label>
|
||||||
|
<select
|
||||||
|
value={model}
|
||||||
|
onChange={(e) => setModel(e.target.value)}
|
||||||
|
onBlur={handleSaveModel}
|
||||||
|
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||||
|
>
|
||||||
|
<option value="tiny">Tiny (fastest, ~75MB)</option>
|
||||||
|
<option value="small">Small (balanced, ~500MB)</option>
|
||||||
|
<option value="medium">Medium (most accurate, ~1.5GB)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Port */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-[var(--text-secondary)] mb-1">Port</label>
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
value={port}
|
||||||
|
onChange={(e) => setPort(e.target.value)}
|
||||||
|
onBlur={handleSavePort}
|
||||||
|
min={1}
|
||||||
|
max={65535}
|
||||||
|
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Language */}
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-[var(--text-secondary)] mb-1">Language (optional)</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={language}
|
||||||
|
onChange={(e) => setLanguage(e.target.value)}
|
||||||
|
onBlur={handleSaveLanguage}
|
||||||
|
placeholder="Auto-detect"
|
||||||
|
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Container status + controls */}
|
||||||
|
<div className="pt-1">
|
||||||
|
<label className="block text-xs text-[var(--text-secondary)] mb-1">STT Container</label>
|
||||||
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
|
<span className="text-xs text-[var(--text-secondary)]">
|
||||||
|
{status?.image_exists
|
||||||
|
? status.running
|
||||||
|
? `Running (port ${status.port}, model: ${status.model})`
|
||||||
|
: status.container_exists
|
||||||
|
? "Stopped"
|
||||||
|
: "Image ready"
|
||||||
|
: "No image"}
|
||||||
|
</span>
|
||||||
|
{status?.image_exists && (
|
||||||
|
<button
|
||||||
|
onClick={handleStartStop}
|
||||||
|
disabled={loading}
|
||||||
|
className={`px-2 py-0.5 text-xs rounded transition-colors ${
|
||||||
|
status?.running
|
||||||
|
? "text-[var(--error)] hover:bg-[var(--bg-primary)]"
|
||||||
|
: "text-[var(--success)] hover:bg-[var(--bg-primary)]"
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{loading ? "..." : status?.running ? "Stop" : "Start"}
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Image actions */}
|
||||||
|
<div className="flex items-center gap-2 mt-2">
|
||||||
|
<button
|
||||||
|
onClick={handlePull}
|
||||||
|
disabled={pulling || building}
|
||||||
|
className="px-3 py-1 text-xs bg-[var(--bg-primary)] border border-[var(--border-color)] rounded hover:bg-[var(--border-color)] disabled:opacity-50 transition-colors"
|
||||||
|
>
|
||||||
|
{pulling ? "Pulling..." : "Pull Image"}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={handleBuild}
|
||||||
|
disabled={pulling || building}
|
||||||
|
className="px-3 py-1 text-xs bg-[var(--bg-primary)] border border-[var(--border-color)] rounded hover:bg-[var(--border-color)] disabled:opacity-50 transition-colors"
|
||||||
|
>
|
||||||
|
{building ? "Building..." : "Build Locally"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{buildLog && (
|
||||||
|
<pre className="mt-2 text-[10px] text-[var(--text-secondary)] bg-[var(--bg-primary)] border border-[var(--border-color)] rounded px-2 py-1 max-h-20 overflow-y-auto whitespace-pre-wrap">
|
||||||
|
{buildLog}
|
||||||
|
</pre>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
107
app/src/components/terminal/SttButton.tsx
Normal file
107
app/src/components/terminal/SttButton.tsx
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import { useCallback, useEffect, useRef, useState } from "react";
|
||||||
|
import { useSTT } from "../../hooks/useSTT";
|
||||||
|
import * as commands from "../../lib/tauri-commands";
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
sessionId: string;
|
||||||
|
sendInput: (sessionId: string, data: string) => Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function SttButton({ sessionId, sendInput }: Props) {
|
||||||
|
const { state, error, toggle, cancelRecording } = useSTT(sessionId, sendInput);
|
||||||
|
const [elapsed, setElapsed] = useState(0);
|
||||||
|
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||||
|
|
||||||
|
// Track recording duration
|
||||||
|
useEffect(() => {
|
||||||
|
if (state === "recording") {
|
||||||
|
setElapsed(0);
|
||||||
|
timerRef.current = setInterval(() => setElapsed((e) => e + 1), 1000);
|
||||||
|
} else {
|
||||||
|
if (timerRef.current) {
|
||||||
|
clearInterval(timerRef.current);
|
||||||
|
timerRef.current = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return () => {
|
||||||
|
if (timerRef.current) clearInterval(timerRef.current);
|
||||||
|
};
|
||||||
|
}, [state]);
|
||||||
|
|
||||||
|
const handleClick = useCallback(async () => {
|
||||||
|
// Auto-start STT container if not running
|
||||||
|
if (state === "idle") {
|
||||||
|
try {
|
||||||
|
const status = await commands.getSttStatus();
|
||||||
|
if (!status.running) {
|
||||||
|
await commands.startStt();
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Container start failed, toggle will still attempt transcription
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await toggle();
|
||||||
|
}, [state, toggle]);
|
||||||
|
|
||||||
|
const handleContextMenu = useCallback(
|
||||||
|
(e: React.MouseEvent) => {
|
||||||
|
e.preventDefault();
|
||||||
|
if (state === "recording") {
|
||||||
|
cancelRecording();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
[state, cancelRecording],
|
||||||
|
);
|
||||||
|
|
||||||
|
const formatTime = (seconds: number) => {
|
||||||
|
const m = Math.floor(seconds / 60);
|
||||||
|
const s = seconds % 60;
|
||||||
|
return `${m}:${s.toString().padStart(2, "0")}`;
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="absolute bottom-4 left-4 z-50 flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={handleClick}
|
||||||
|
onContextMenu={handleContextMenu}
|
||||||
|
disabled={state === "transcribing"}
|
||||||
|
className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${
|
||||||
|
state === "recording"
|
||||||
|
? "bg-[#f85149] text-white shadow-lg animate-pulse"
|
||||||
|
: state === "transcribing"
|
||||||
|
? "bg-[#1f2937] text-[#58a6ff] border border-[#30363d] opacity-80"
|
||||||
|
: "bg-[#1f2937]/80 text-[#8b949e] border border-[#30363d] hover:text-[#e6edf3] hover:bg-[#2d3748]"
|
||||||
|
}`}
|
||||||
|
title={
|
||||||
|
state === "recording"
|
||||||
|
? "Click to stop and transcribe (right-click to cancel)"
|
||||||
|
: state === "transcribing"
|
||||||
|
? "Transcribing..."
|
||||||
|
: "Speech to text"
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{state === "transcribing" ? (
|
||||||
|
<svg className="w-4 h-4 animate-spin" viewBox="0 0 24 24" fill="none">
|
||||||
|
<circle cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="2" opacity="0.25" />
|
||||||
|
<path d="M12 2a10 10 0 0 1 10 10" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
|
||||||
|
</svg>
|
||||||
|
) : (
|
||||||
|
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
|
||||||
|
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z" />
|
||||||
|
<path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
|
||||||
|
</svg>
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
{state === "recording" && (
|
||||||
|
<span className="text-xs text-[#f85149] font-mono bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d]">
|
||||||
|
{formatTime(elapsed)}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
{state === "error" && error && (
|
||||||
|
<span className="text-xs text-[#f85149] bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d] max-w-[200px] truncate">
|
||||||
|
{error}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -7,6 +7,7 @@ import { openUrl } from "@tauri-apps/plugin-opener";
|
|||||||
import "@xterm/xterm/css/xterm.css";
|
import "@xterm/xterm/css/xterm.css";
|
||||||
import { useTerminal } from "../../hooks/useTerminal";
|
import { useTerminal } from "../../hooks/useTerminal";
|
||||||
import { useAppState } from "../../store/appState";
|
import { useAppState } from "../../store/appState";
|
||||||
|
import SttButton from "./SttButton";
|
||||||
import { awsSsoRefresh } from "../../lib/tauri-commands";
|
import { awsSsoRefresh } from "../../lib/tauri-commands";
|
||||||
import { UrlDetector } from "../../lib/urlDetector";
|
import { UrlDetector } from "../../lib/urlDetector";
|
||||||
import UrlToast from "./UrlToast";
|
import UrlToast from "./UrlToast";
|
||||||
@@ -25,6 +26,7 @@ export default function TerminalView({ sessionId, active }: Props) {
|
|||||||
const detectorRef = useRef<UrlDetector | null>(null);
|
const detectorRef = useRef<UrlDetector | null>(null);
|
||||||
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
|
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
|
||||||
const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection);
|
const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection);
|
||||||
|
const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled);
|
||||||
|
|
||||||
const ssoBufferRef = useRef("");
|
const ssoBufferRef = useRef("");
|
||||||
const ssoTriggeredRef = useRef(false);
|
const ssoTriggeredRef = useRef(false);
|
||||||
@@ -424,6 +426,8 @@ export default function TerminalView({ sessionId, active }: Props) {
|
|||||||
>
|
>
|
||||||
{isAutoFollow ? "▼ Following" : "▽ Paused"}
|
{isAutoFollow ? "▼ Following" : "▽ Paused"}
|
||||||
</button>
|
</button>
|
||||||
|
{/* STT mic button - bottom left */}
|
||||||
|
{sttEnabled && <SttButton sessionId={sessionId} sendInput={sendInput} />}
|
||||||
{/* Jump to Current - bottom right, when scrolled up */}
|
{/* Jump to Current - bottom right, when scrolled up */}
|
||||||
{!isAtBottom && (
|
{!isAtBottom && (
|
||||||
<button
|
<button
|
||||||
|
|||||||
145
app/src/hooks/useSTT.ts
Normal file
145
app/src/hooks/useSTT.ts
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
import { useCallback, useRef, useState } from "react";
|
||||||
|
import * as commands from "../lib/tauri-commands";
|
||||||
|
import { encodeWav } from "../lib/wav";
|
||||||
|
import { useAppState } from "../store/appState";
|
||||||
|
|
||||||
|
export type SttState = "idle" | "recording" | "transcribing" | "error";
|
||||||
|
|
||||||
|
export function useSTT(sessionId: string, sendInput: (sessionId: string, data: string) => Promise<void>) {
|
||||||
|
const [state, setState] = useState<SttState>("idle");
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
|
const audioContextRef = useRef<AudioContext | null>(null);
|
||||||
|
const streamRef = useRef<MediaStream | null>(null);
|
||||||
|
const workletRef = useRef<AudioWorkletNode | null>(null);
|
||||||
|
const chunksRef = useRef<Int16Array[]>([]);
|
||||||
|
|
||||||
|
const appSettings = useAppState((s) => s.appSettings);
|
||||||
|
const deviceId = appSettings?.default_microphone;
|
||||||
|
|
||||||
|
const startRecording = useCallback(async () => {
|
||||||
|
if (state === "recording" || state === "transcribing") return;
|
||||||
|
setState("recording");
|
||||||
|
setError(null);
|
||||||
|
chunksRef.current = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const audioConstraints: MediaTrackConstraints = {
|
||||||
|
channelCount: 1,
|
||||||
|
echoCancellation: true,
|
||||||
|
noiseSuppression: true,
|
||||||
|
autoGainControl: true,
|
||||||
|
};
|
||||||
|
if (deviceId) {
|
||||||
|
audioConstraints.deviceId = { exact: deviceId };
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
|
||||||
|
streamRef.current = stream;
|
||||||
|
|
||||||
|
const audioContext = new AudioContext({ sampleRate: 16000 });
|
||||||
|
audioContextRef.current = audioContext;
|
||||||
|
|
||||||
|
await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
|
||||||
|
|
||||||
|
const source = audioContext.createMediaStreamSource(stream);
|
||||||
|
const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
|
||||||
|
workletRef.current = processor;
|
||||||
|
|
||||||
|
processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
|
||||||
|
chunksRef.current.push(new Int16Array(event.data));
|
||||||
|
};
|
||||||
|
|
||||||
|
source.connect(processor);
|
||||||
|
processor.connect(audioContext.destination);
|
||||||
|
} catch (e) {
|
||||||
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
|
setError(msg);
|
||||||
|
setState("error");
|
||||||
|
}
|
||||||
|
}, [state, deviceId]);
|
||||||
|
|
||||||
|
const stopRecording = useCallback(async () => {
|
||||||
|
if (state !== "recording") return;
|
||||||
|
|
||||||
|
// Stop audio capture
|
||||||
|
workletRef.current?.disconnect();
|
||||||
|
workletRef.current = null;
|
||||||
|
|
||||||
|
if (audioContextRef.current) {
|
||||||
|
await audioContextRef.current.close().catch(() => {});
|
||||||
|
audioContextRef.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (streamRef.current) {
|
||||||
|
streamRef.current.getTracks().forEach((t) => t.stop());
|
||||||
|
streamRef.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Concatenate PCM chunks
|
||||||
|
const chunks = chunksRef.current;
|
||||||
|
chunksRef.current = [];
|
||||||
|
|
||||||
|
if (chunks.length === 0) {
|
||||||
|
setState("idle");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
|
||||||
|
const pcm = new Int16Array(totalLength);
|
||||||
|
let offset = 0;
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
pcm.set(chunk, offset);
|
||||||
|
offset += chunk.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encode to WAV and transcribe
|
||||||
|
setState("transcribing");
|
||||||
|
try {
|
||||||
|
const wavBlob = encodeWav(pcm, 16000);
|
||||||
|
const wavBuffer = await wavBlob.arrayBuffer();
|
||||||
|
const audioData = Array.from(new Uint8Array(wavBuffer));
|
||||||
|
|
||||||
|
const text = await commands.transcribeAudio(audioData);
|
||||||
|
if (text) {
|
||||||
|
await sendInput(sessionId, text);
|
||||||
|
}
|
||||||
|
setState("idle");
|
||||||
|
} catch (e) {
|
||||||
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
|
setError(msg);
|
||||||
|
setState("error");
|
||||||
|
// Reset to idle after a brief delay so the UI shows the error
|
||||||
|
setTimeout(() => setState("idle"), 3000);
|
||||||
|
}
|
||||||
|
}, [state, sessionId, sendInput]);
|
||||||
|
|
||||||
|
const cancelRecording = useCallback(async () => {
|
||||||
|
workletRef.current?.disconnect();
|
||||||
|
workletRef.current = null;
|
||||||
|
|
||||||
|
if (audioContextRef.current) {
|
||||||
|
await audioContextRef.current.close().catch(() => {});
|
||||||
|
audioContextRef.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (streamRef.current) {
|
||||||
|
streamRef.current.getTracks().forEach((t) => t.stop());
|
||||||
|
streamRef.current = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
chunksRef.current = [];
|
||||||
|
setState("idle");
|
||||||
|
setError(null);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const toggle = useCallback(async () => {
|
||||||
|
if (state === "recording") {
|
||||||
|
await stopRecording();
|
||||||
|
} else if (state === "idle" || state === "error") {
|
||||||
|
await startRecording();
|
||||||
|
}
|
||||||
|
}, [state, startRecording, stopRecording]);
|
||||||
|
|
||||||
|
return { state, error, startRecording, stopRecording, cancelRecording, toggle };
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import { invoke } from "@tauri-apps/api/core";
|
import { invoke } from "@tauri-apps/api/core";
|
||||||
import type { Project, ProjectPath, ContainerInfo, SiblingContainer, AppSettings, UpdateInfo, ImageUpdateInfo, McpServer, FileEntry, WebTerminalInfo } from "./types";
|
import type { Project, ProjectPath, ContainerInfo, SiblingContainer, AppSettings, UpdateInfo, ImageUpdateInfo, McpServer, FileEntry, WebTerminalInfo, SttStatus } from "./types";
|
||||||
|
|
||||||
// Docker
|
// Docker
|
||||||
export const checkDocker = () => invoke<boolean>("check_docker");
|
export const checkDocker = () => invoke<boolean>("check_docker");
|
||||||
@@ -98,3 +98,12 @@ export const getWebTerminalStatus = () =>
|
|||||||
invoke<WebTerminalInfo>("get_web_terminal_status");
|
invoke<WebTerminalInfo>("get_web_terminal_status");
|
||||||
export const regenerateWebTerminalToken = () =>
|
export const regenerateWebTerminalToken = () =>
|
||||||
invoke<WebTerminalInfo>("regenerate_web_terminal_token");
|
invoke<WebTerminalInfo>("regenerate_web_terminal_token");
|
||||||
|
|
||||||
|
// STT
|
||||||
|
export const getSttStatus = () => invoke<SttStatus>("get_stt_status");
|
||||||
|
export const startStt = () => invoke<SttStatus>("start_stt");
|
||||||
|
export const stopStt = () => invoke<void>("stop_stt");
|
||||||
|
export const buildSttImage = () => invoke<void>("build_stt_image");
|
||||||
|
export const pullSttImage = () => invoke<void>("pull_stt_image");
|
||||||
|
export const transcribeAudio = (audioData: number[]) =>
|
||||||
|
invoke<string>("transcribe_audio", { audioData });
|
||||||
|
|||||||
@@ -119,6 +119,22 @@ export interface AppSettings {
|
|||||||
default_microphone: string | null;
|
default_microphone: string | null;
|
||||||
dismissed_image_digest: string | null;
|
dismissed_image_digest: string | null;
|
||||||
web_terminal: WebTerminalSettings;
|
web_terminal: WebTerminalSettings;
|
||||||
|
stt: SttSettings;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SttSettings {
|
||||||
|
enabled: boolean;
|
||||||
|
model: string;
|
||||||
|
port: number;
|
||||||
|
language: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SttStatus {
|
||||||
|
container_exists: boolean;
|
||||||
|
running: boolean;
|
||||||
|
port: number;
|
||||||
|
model: string;
|
||||||
|
image_exists: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface WebTerminalSettings {
|
export interface WebTerminalSettings {
|
||||||
|
|||||||
40
app/src/lib/wav.ts
Normal file
40
app/src/lib/wav.ts
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/**
|
||||||
|
* Encode PCM Int16 samples into a WAV file blob.
|
||||||
|
* Assumes mono channel at the given sample rate.
|
||||||
|
*/
|
||||||
|
export function encodeWav(samples: Int16Array, sampleRate: number): Blob {
|
||||||
|
const byteLength = samples.length * 2;
|
||||||
|
const buffer = new ArrayBuffer(44 + byteLength);
|
||||||
|
const view = new DataView(buffer);
|
||||||
|
|
||||||
|
// RIFF header
|
||||||
|
writeString(view, 0, "RIFF");
|
||||||
|
view.setUint32(4, 36 + byteLength, true);
|
||||||
|
writeString(view, 8, "WAVE");
|
||||||
|
|
||||||
|
// fmt chunk
|
||||||
|
writeString(view, 12, "fmt ");
|
||||||
|
view.setUint32(16, 16, true); // chunk size
|
||||||
|
view.setUint16(20, 1, true); // PCM format
|
||||||
|
view.setUint16(22, 1, true); // mono
|
||||||
|
view.setUint32(24, sampleRate, true);
|
||||||
|
view.setUint32(28, sampleRate * 2, true); // byte rate
|
||||||
|
view.setUint16(32, 2, true); // block align
|
||||||
|
view.setUint16(34, 16, true); // bits per sample
|
||||||
|
|
||||||
|
// data chunk
|
||||||
|
writeString(view, 36, "data");
|
||||||
|
view.setUint32(40, byteLength, true);
|
||||||
|
|
||||||
|
// PCM samples
|
||||||
|
const output = new Int16Array(buffer, 44);
|
||||||
|
output.set(samples);
|
||||||
|
|
||||||
|
return new Blob([buffer], { type: "audio/wav" });
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeString(view: DataView, offset: number, str: string) {
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
view.setUint8(offset + i, str.charCodeAt(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
13
stt-container/Dockerfile
Normal file
13
stt-container/Dockerfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
faster-whisper \
|
||||||
|
fastapi \
|
||||||
|
uvicorn[standard] \
|
||||||
|
python-multipart
|
||||||
|
|
||||||
|
COPY server.py /app/server.py
|
||||||
|
|
||||||
|
EXPOSE 9876
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "9876"]
|
||||||
41
stt-container/server.py
Normal file
41
stt-container/server.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
from fastapi import FastAPI, File, Form, UploadFile
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
model: WhisperModel | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def load_model():
|
||||||
|
global model
|
||||||
|
model_size = os.environ.get("WHISPER_MODEL", "tiny")
|
||||||
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/transcribe")
|
||||||
|
async def transcribe(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
language: str = Form(None),
|
||||||
|
):
|
||||||
|
if model is None:
|
||||||
|
return JSONResponse(status_code=503, content={"error": "Model not loaded"})
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as tmp:
|
||||||
|
tmp.write(await file.read())
|
||||||
|
tmp.flush()
|
||||||
|
kwargs = {}
|
||||||
|
if language:
|
||||||
|
kwargs["language"] = language
|
||||||
|
segments, info = model.transcribe(tmp.name, **kwargs)
|
||||||
|
text = " ".join(s.text for s in segments).strip()
|
||||||
|
|
||||||
|
return {"text": text, "language": info.language}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"status": "ok"}
|
||||||
Reference in New Issue
Block a user