From 532de77927f61005199f1ce26492968a53ffac58 Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Sun, 12 Apr 2026 20:02:39 -0700 Subject: [PATCH] Add speech-to-text feature using Faster Whisper container Adds a mic button to the terminal UI that captures speech, transcribes it via a Faster Whisper sidecar container, and injects the text into the terminal input. Includes settings panel for model selection (tiny/small/medium), port config, and container lifecycle management. - stt-container/: Dockerfile + FastAPI server for Whisper transcription - Rust backend: STT container management, transcribe_audio IPC command - Frontend: useSTT hook, SttButton, SttSettings, WAV encoder - CI: Gitea Actions workflow for multi-arch STT image builds Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitea/workflows/build-stt.yml | 59 ++++ app/src-tauri/Cargo.lock | 18 ++ app/src-tauri/Cargo.toml | 2 +- app/src-tauri/src/commands/mod.rs | 1 + app/src-tauri/src/commands/stt_commands.rs | 92 ++++++ app/src-tauri/src/docker/mod.rs | 3 + app/src-tauri/src/docker/stt.rs | 266 ++++++++++++++++++ app/src-tauri/src/lib.rs | 9 + app/src-tauri/src/models/app_settings.rs | 43 +++ app/src/components/settings/SettingsPanel.tsx | 4 + app/src/components/settings/SttSettings.tsx | 249 ++++++++++++++++ app/src/components/terminal/SttButton.tsx | 107 +++++++ app/src/components/terminal/TerminalView.tsx | 4 + app/src/hooks/useSTT.ts | 145 ++++++++++ app/src/lib/tauri-commands.ts | 11 +- app/src/lib/types.ts | 16 ++ app/src/lib/wav.ts | 40 +++ stt-container/Dockerfile | 13 + stt-container/server.py | 41 +++ 19 files changed, 1121 insertions(+), 2 deletions(-) create mode 100644 .gitea/workflows/build-stt.yml create mode 100644 app/src-tauri/src/commands/stt_commands.rs create mode 100644 app/src-tauri/src/docker/stt.rs create mode 100644 app/src/components/settings/SttSettings.tsx create mode 100644 app/src/components/terminal/SttButton.tsx create mode 100644 app/src/hooks/useSTT.ts create mode 100644 app/src/lib/wav.ts create mode 100644 stt-container/Dockerfile create mode 100644 stt-container/server.py diff --git a/.gitea/workflows/build-stt.yml b/.gitea/workflows/build-stt.yml new file mode 100644 index 0000000..8341957 --- /dev/null +++ b/.gitea/workflows/build-stt.yml @@ -0,0 +1,59 @@ +name: Build STT Container + +on: + push: + branches: [main] + paths: + - "stt-container/**" + - ".gitea/workflows/build-stt.yml" + pull_request: + branches: [main] + paths: + - "stt-container/**" + - ".gitea/workflows/build-stt.yml" + +env: + REGISTRY: repo.anhonesthost.net + IMAGE_NAME: cybercovellc/triple-c/triple-c-stt + +jobs: + build-stt-container: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Gitea Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ gitea.actor }} + password: ${{ secrets.REGISTRY_TOKEN }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: shadowdao + password: ${{ secrets.GH_PAT }} + + - name: Build and push STT container image + uses: docker/build-push-action@v5 + with: + context: ./stt-container + file: ./stt-container/Dockerfile + platforms: linux/amd64,linux/arm64 + push: ${{ gitea.event_name == 'push' }} + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ gitea.sha }} + ghcr.io/shadowdao/triple-c-stt:latest + ghcr.io/shadowdao/triple-c-stt:${{ gitea.sha }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/app/src-tauri/Cargo.lock b/app/src-tauri/Cargo.lock index 9248a37..1a63d5e 100644 --- a/app/src-tauri/Cargo.lock +++ b/app/src-tauri/Cargo.lock @@ -2345,6 +2345,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -3454,6 +3464,7 @@ dependencies = [ "base64 0.22.1", "bytes", "futures-core", + "futures-util", "http", "http-body", "http-body-util", @@ -3462,6 +3473,7 @@ dependencies = [ "hyper-util", "js-sys", "log", + "mime_guess", "percent-encoding", "pin-project-lite", "quinn", @@ -5053,6 +5065,12 @@ dependencies = [ "unic-common", ] +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index 58409bc..89ea02e 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -29,7 +29,7 @@ log = "0.4" fern = { version = "0.7", features = ["date-based"] } tar = "0.4" include_dir = "0.7" -reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls", "multipart"] } iana-time-zone = "0.1" sha2 = "0.10" axum = { version = "0.8", features = ["ws"] } diff --git a/app/src-tauri/src/commands/mod.rs b/app/src-tauri/src/commands/mod.rs index 8503563..6af0fe7 100644 --- a/app/src-tauri/src/commands/mod.rs +++ b/app/src-tauri/src/commands/mod.rs @@ -5,6 +5,7 @@ pub mod help_commands; pub mod mcp_commands; pub mod project_commands; pub mod settings_commands; +pub mod stt_commands; pub mod terminal_commands; pub mod update_commands; pub mod web_terminal_commands; diff --git a/app/src-tauri/src/commands/stt_commands.rs b/app/src-tauri/src/commands/stt_commands.rs new file mode 100644 index 0000000..87f015b --- /dev/null +++ b/app/src-tauri/src/commands/stt_commands.rs @@ -0,0 +1,92 @@ +use tauri::{AppHandle, Emitter, State}; + +use crate::docker::stt; +use crate::models::app_settings::SttStatus; +use crate::AppState; + +#[tauri::command] +pub async fn get_stt_status(state: State<'_, AppState>) -> Result { + let settings = state.settings_store.get(); + stt::get_stt_status(&settings.stt).await +} + +#[tauri::command] +pub async fn start_stt(state: State<'_, AppState>) -> Result { + let settings = state.settings_store.get(); + stt::ensure_stt_running(&settings.stt).await +} + +#[tauri::command] +pub async fn stop_stt() -> Result<(), String> { + stt::stop_stt_container().await +} + +#[tauri::command] +pub async fn build_stt_image(app_handle: AppHandle) -> Result<(), String> { + stt::build_stt_image(move |msg| { + let _ = app_handle.emit("stt-build-progress", &msg); + }) + .await +} + +#[tauri::command] +pub async fn pull_stt_image(app_handle: AppHandle) -> Result<(), String> { + stt::pull_stt_image(move |msg| { + let _ = app_handle.emit("stt-pull-progress", &msg); + }) + .await +} + +#[tauri::command] +pub async fn transcribe_audio( + audio_data: Vec, + state: State<'_, AppState>, +) -> Result { + let settings = state.settings_store.get(); + if !settings.stt.enabled { + return Err("STT is not enabled".to_string()); + } + + let url = format!("http://127.0.0.1:{}/transcribe", settings.stt.port); + + let file_part = reqwest::multipart::Part::bytes(audio_data) + .file_name("recording.wav") + .mime_str("audio/wav") + .map_err(|e| format!("Failed to create multipart: {}", e))?; + + let mut form = reqwest::multipart::Form::new().part("file", file_part); + + if let Some(ref lang) = settings.stt.language { + form = form.text("language", lang.clone()); + } + + let client = reqwest::Client::new(); + let response = client + .post(&url) + .multipart(form) + .send() + .await + .map_err(|e| { + if e.is_connect() { + "STT container is not running. Start it from Settings.".to_string() + } else { + format!("Transcription request failed: {}", e) + } + })?; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + return Err(format!("Transcription failed ({}): {}", status, body)); + } + + let result: serde_json::Value = response + .json() + .await + .map_err(|e| format!("Failed to parse transcription response: {}", e))?; + + result["text"] + .as_str() + .map(|s| s.to_string()) + .ok_or_else(|| "No text in transcription response".to_string()) +} diff --git a/app/src-tauri/src/docker/mod.rs b/app/src-tauri/src/docker/mod.rs index 701e69b..bf3e610 100644 --- a/app/src-tauri/src/docker/mod.rs +++ b/app/src-tauri/src/docker/mod.rs @@ -3,7 +3,10 @@ pub mod container; pub mod image; pub mod exec; pub mod network; +pub mod stt; +#[allow(unused_imports)] +pub use stt::*; #[allow(unused_imports)] pub use client::*; #[allow(unused_imports)] diff --git a/app/src-tauri/src/docker/stt.rs b/app/src-tauri/src/docker/stt.rs new file mode 100644 index 0000000..4de5971 --- /dev/null +++ b/app/src-tauri/src/docker/stt.rs @@ -0,0 +1,266 @@ +use bollard::container::{ + Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions, + StartContainerOptions, StopContainerOptions, +}; +use bollard::image::BuildImageOptions; +use bollard::models::{HostConfig, Mount, MountTypeEnum, PortBinding}; +use futures_util::StreamExt; +use std::collections::HashMap; +use std::io::Write; + +use super::client::get_docker; +use crate::models::app_settings::{SttSettings, SttStatus}; + +const STT_CONTAINER_NAME: &str = "triple-c-stt"; +const STT_MODEL_VOLUME: &str = "triple-c-stt-model-cache"; +const STT_REGISTRY_IMAGE: &str = "ghcr.io/shadowdao/triple-c-stt:latest"; +const STT_LOCAL_IMAGE: &str = "triple-c-stt:latest"; +const STT_DOCKERFILE: &str = include_str!("../../../../stt-container/Dockerfile"); +const STT_SERVER: &str = include_str!("../../../../stt-container/server.py"); + +pub async fn get_stt_status(settings: &SttSettings) -> Result { + let image_exists = super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false) + || super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false); + + let (container_exists, running, model) = match find_stt_container().await? { + Some((_, state, env_model)) => (true, state == "running", env_model), + None => (false, false, settings.model.clone()), + }; + + Ok(SttStatus { + container_exists, + running, + port: settings.port, + model, + image_exists, + }) +} + +async fn find_stt_container() -> Result, String> { + let docker = get_docker()?; + + let filters: HashMap> = HashMap::from([( + "name".to_string(), + vec![format!("/{}", STT_CONTAINER_NAME)], + )]); + + let containers = docker + .list_containers(Some(ListContainersOptions { + all: true, + filters, + ..Default::default() + })) + .await + .map_err(|e| format!("Failed to list containers: {}", e))?; + + if let Some(container) = containers.first() { + let id = container.id.clone().unwrap_or_default(); + let state = container.state.clone().unwrap_or_default(); + + // Extract WHISPER_MODEL from container env + let model = container + .labels + .as_ref() + .and_then(|l| l.get("triple-c.stt.model")) + .cloned() + .unwrap_or_else(|| "tiny".to_string()); + + return Ok(Some((id, state, model))); + } + + Ok(None) +} + +async fn create_stt_container(settings: &SttSettings) -> Result { + let docker = get_docker()?; + + // Try local image first, fall back to registry + let image = if super::image::image_exists(STT_LOCAL_IMAGE).await.unwrap_or(false) { + STT_LOCAL_IMAGE.to_string() + } else if super::image::image_exists(STT_REGISTRY_IMAGE).await.unwrap_or(false) { + STT_REGISTRY_IMAGE.to_string() + } else { + return Err("STT image not found. Please build or pull the image first.".to_string()); + }; + + let port_binding = PortBinding { + host_ip: Some("127.0.0.1".to_string()), + host_port: Some(settings.port.to_string()), + }; + + let mut port_bindings = HashMap::new(); + port_bindings.insert( + "9876/tcp".to_string(), + Some(vec![port_binding]), + ); + + let host_config = HostConfig { + port_bindings: Some(port_bindings), + mounts: Some(vec![Mount { + target: Some("/root/.cache/huggingface".to_string()), + source: Some(STT_MODEL_VOLUME.to_string()), + typ: Some(MountTypeEnum::VOLUME), + ..Default::default() + }]), + ..Default::default() + }; + + let mut labels = HashMap::new(); + labels.insert( + "triple-c.stt.model".to_string(), + settings.model.clone(), + ); + labels.insert( + "triple-c.stt.port".to_string(), + settings.port.to_string(), + ); + + let config = Config { + image: Some(image), + env: Some(vec![format!("WHISPER_MODEL={}", settings.model)]), + host_config: Some(host_config), + labels: Some(labels), + ..Default::default() + }; + + let options = CreateContainerOptions { + name: STT_CONTAINER_NAME, + ..Default::default() + }; + + let response = docker + .create_container(Some(options), config) + .await + .map_err(|e| format!("Failed to create STT container: {}", e))?; + + Ok(response.id) +} + +pub async fn ensure_stt_running(settings: &SttSettings) -> Result { + let docker = get_docker()?; + + // Check if container exists and if settings match + if let Some((id, state, model)) = find_stt_container().await? { + let needs_recreate = model != settings.model; + + if needs_recreate { + // Settings changed, recreate + if state == "running" { + docker + .stop_container(&id, None::) + .await + .map_err(|e| format!("Failed to stop STT container: {}", e))?; + } + docker + .remove_container( + &id, + Some(RemoveContainerOptions { + force: true, + ..Default::default() + }), + ) + .await + .map_err(|e| format!("Failed to remove STT container: {}", e))?; + } else if state == "running" { + return get_stt_status(settings).await; + } else { + // Container exists but stopped, start it + docker + .start_container(&id, None::>) + .await + .map_err(|e| format!("Failed to start STT container: {}", e))?; + return get_stt_status(settings).await; + } + } + + // Create and start new container + let id = create_stt_container(settings).await?; + docker + .start_container(&id, None::>) + .await + .map_err(|e| format!("Failed to start STT container: {}", e))?; + + get_stt_status(settings).await +} + +pub async fn stop_stt_container() -> Result<(), String> { + let docker = get_docker()?; + + if let Some((id, state, _)) = find_stt_container().await? { + if state == "running" { + docker + .stop_container(&id, None::) + .await + .map_err(|e| format!("Failed to stop STT container: {}", e))?; + } + } + + Ok(()) +} + +pub async fn pull_stt_image(on_progress: F) -> Result<(), String> +where + F: Fn(String) + Send + 'static, +{ + super::image::pull_image(STT_REGISTRY_IMAGE, on_progress).await +} + +pub async fn build_stt_image(on_progress: F) -> Result<(), String> +where + F: Fn(String) + Send + 'static, +{ + let docker = get_docker()?; + + let tar_bytes = create_stt_build_context() + .map_err(|e| format!("Failed to create STT build context: {}", e))?; + + let options = BuildImageOptions { + t: STT_LOCAL_IMAGE, + rm: true, + forcerm: true, + ..Default::default() + }; + + let mut stream = docker.build_image(options, None, Some(tar_bytes.into())); + + while let Some(result) = stream.next().await { + match result { + Ok(output) => { + if let Some(stream) = output.stream { + on_progress(stream); + } + if let Some(error) = output.error { + return Err(format!("Build error: {}", error)); + } + } + Err(e) => return Err(format!("Build stream error: {}", e)), + } + } + + Ok(()) +} + +fn create_stt_build_context() -> Result, std::io::Error> { + let mut buf = Vec::new(); + { + let mut archive = tar::Builder::new(&mut buf); + + let mut dockerfile_header = tar::Header::new_gnu(); + dockerfile_header.set_size(STT_DOCKERFILE.len() as u64); + dockerfile_header.set_mode(0o644); + dockerfile_header.set_cksum(); + archive.append_data(&mut dockerfile_header, "Dockerfile", STT_DOCKERFILE.as_bytes())?; + + let mut server_header = tar::Header::new_gnu(); + server_header.set_size(STT_SERVER.len() as u64); + server_header.set_mode(0o644); + server_header.set_cksum(); + archive.append_data(&mut server_header, "server.py", STT_SERVER.as_bytes())?; + + archive.finish()?; + } + + let _ = buf.flush(); + Ok(buf) +} + diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index 7bd8953..7f934fb 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -122,6 +122,8 @@ pub fn run() { if let Some(server) = server_guard.take() { server.stop(); } + // Stop STT container + let _ = docker::stt::stop_stt_container().await; // Close all exec sessions state.exec_manager.close_all_sessions().await; }); @@ -181,6 +183,13 @@ pub fn run() { commands::web_terminal_commands::stop_web_terminal, commands::web_terminal_commands::get_web_terminal_status, commands::web_terminal_commands::regenerate_web_terminal_token, + // STT + commands::stt_commands::get_stt_status, + commands::stt_commands::start_stt, + commands::stt_commands::stop_stt, + commands::stt_commands::build_stt_image, + commands::stt_commands::pull_stt_image, + commands::stt_commands::transcribe_audio, ]) .run(tauri::generate_context!()) .expect("error while running tauri application"); diff --git a/app/src-tauri/src/models/app_settings.rs b/app/src-tauri/src/models/app_settings.rs index bb0ff18..870d702 100644 --- a/app/src-tauri/src/models/app_settings.rs +++ b/app/src-tauri/src/models/app_settings.rs @@ -76,6 +76,48 @@ pub struct AppSettings { pub dismissed_image_digest: Option, #[serde(default)] pub web_terminal: WebTerminalSettings, + #[serde(default)] + pub stt: SttSettings, +} + +fn default_stt_model() -> String { + "tiny".to_string() +} + +fn default_stt_port() -> u16 { + 9876 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SttSettings { + #[serde(default)] + pub enabled: bool, + #[serde(default = "default_stt_model")] + pub model: String, + #[serde(default = "default_stt_port")] + pub port: u16, + #[serde(default)] + pub language: Option, +} + +impl Default for SttSettings { + fn default() -> Self { + Self { + enabled: false, + model: default_stt_model(), + port: 9876, + language: None, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SttStatus { + pub container_exists: bool, + pub running: bool, + pub port: u16, + pub model: String, + pub image_exists: bool, } fn default_web_terminal_port() -> u16 { @@ -120,6 +162,7 @@ impl Default for AppSettings { default_microphone: None, dismissed_image_digest: None, web_terminal: WebTerminalSettings::default(), + stt: SttSettings::default(), } } } diff --git a/app/src/components/settings/SettingsPanel.tsx b/app/src/components/settings/SettingsPanel.tsx index 8b0ab7d..1c4b423 100644 --- a/app/src/components/settings/SettingsPanel.tsx +++ b/app/src/components/settings/SettingsPanel.tsx @@ -9,6 +9,7 @@ import { detectHostTimezone } from "../../lib/tauri-commands"; import type { EnvVar } from "../../lib/types"; import Tooltip from "../ui/Tooltip"; import WebTerminalSettings from "./WebTerminalSettings"; +import SttSettings from "./SttSettings"; export default function SettingsPanel() { const { appSettings, saveSettings } = useSettings(); @@ -120,6 +121,9 @@ export default function SettingsPanel() { {/* Web Terminal */} + {/* Speech to Text */} + + {/* Updates section */}
diff --git a/app/src/components/settings/SttSettings.tsx b/app/src/components/settings/SttSettings.tsx new file mode 100644 index 0000000..b8d8bdd --- /dev/null +++ b/app/src/components/settings/SttSettings.tsx @@ -0,0 +1,249 @@ +import { useState, useEffect } from "react"; +import { useSettings } from "../../hooks/useSettings"; +import { getSttStatus, startStt, stopStt, pullSttImage, buildSttImage } from "../../lib/tauri-commands"; +import { listen } from "@tauri-apps/api/event"; +import type { SttStatus } from "../../lib/types"; +import Tooltip from "../ui/Tooltip"; + +export default function SttSettings() { + const { appSettings, saveSettings } = useSettings(); + const [status, setStatus] = useState(null); + const [loading, setLoading] = useState(false); + const [pulling, setPulling] = useState(false); + const [building, setBuilding] = useState(false); + const [buildLog, setBuildLog] = useState(null); + const [model, setModel] = useState(appSettings?.stt?.model ?? "tiny"); + const [port, setPort] = useState(String(appSettings?.stt?.port ?? 9876)); + const [language, setLanguage] = useState(appSettings?.stt?.language ?? ""); + + useEffect(() => { + setModel(appSettings?.stt?.model ?? "tiny"); + setPort(String(appSettings?.stt?.port ?? 9876)); + setLanguage(appSettings?.stt?.language ?? ""); + }, [appSettings?.stt?.model, appSettings?.stt?.port, appSettings?.stt?.language]); + + useEffect(() => { + refreshStatus(); + }, []); + + const refreshStatus = () => { + getSttStatus().then(setStatus).catch(console.error); + }; + + const handleToggleEnabled = async () => { + if (!appSettings) return; + const newEnabled = !appSettings.stt.enabled; + await saveSettings({ + ...appSettings, + stt: { ...appSettings.stt, enabled: newEnabled }, + }); + }; + + const handleSaveModel = async () => { + if (!appSettings) return; + await saveSettings({ + ...appSettings, + stt: { ...appSettings.stt, model }, + }); + }; + + const handleSavePort = async () => { + if (!appSettings) return; + const portNum = parseInt(port, 10); + if (isNaN(portNum) || portNum < 1 || portNum > 65535) return; + await saveSettings({ + ...appSettings, + stt: { ...appSettings.stt, port: portNum }, + }); + }; + + const handleSaveLanguage = async () => { + if (!appSettings) return; + await saveSettings({ + ...appSettings, + stt: { ...appSettings.stt, language: language || null }, + }); + }; + + const handleStartStop = async () => { + setLoading(true); + try { + if (status?.running) { + await stopStt(); + } else { + await startStt(); + } + refreshStatus(); + } catch (e) { + console.error("STT toggle failed:", e); + } finally { + setLoading(false); + } + }; + + const handlePull = async () => { + setPulling(true); + setBuildLog(null); + const unlisten = await listen("stt-pull-progress", (event) => { + setBuildLog(event.payload); + }); + try { + await pullSttImage(); + refreshStatus(); + } catch (e) { + console.error("STT image pull failed:", e); + setBuildLog(`Error: ${e}`); + } finally { + setPulling(false); + unlisten(); + } + }; + + const handleBuild = async () => { + setBuilding(true); + setBuildLog(null); + const unlisten = await listen("stt-build-progress", (event) => { + setBuildLog(event.payload); + }); + try { + await buildSttImage(); + refreshStatus(); + } catch (e) { + console.error("STT image build failed:", e); + setBuildLog(`Error: ${e}`); + } finally { + setBuilding(false); + unlisten(); + } + }; + + return ( +
+ +

+ Click the mic button in the terminal to dictate text via speech recognition. +

+ +
+ {/* Enable toggle */} +
+ + + {appSettings?.stt?.enabled ? "Enabled" : "Disabled"} + +
+ + {appSettings?.stt?.enabled && ( + <> + {/* Model selector */} +
+ + +
+ + {/* Port */} +
+ + setPort(e.target.value)} + onBlur={handleSavePort} + min={1} + max={65535} + className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]" + /> +
+ + {/* Language */} +
+ + setLanguage(e.target.value)} + onBlur={handleSaveLanguage} + placeholder="Auto-detect" + className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]" + /> +
+ + {/* Container status + controls */} +
+ +
+ + {status?.image_exists + ? status.running + ? `Running (port ${status.port}, model: ${status.model})` + : status.container_exists + ? "Stopped" + : "Image ready" + : "No image"} + + {status?.image_exists && ( + + )} +
+ + {/* Image actions */} +
+ + +
+ + {buildLog && ( +
+                  {buildLog}
+                
+ )} +
+ + )} +
+
+ ); +} diff --git a/app/src/components/terminal/SttButton.tsx b/app/src/components/terminal/SttButton.tsx new file mode 100644 index 0000000..4e83e02 --- /dev/null +++ b/app/src/components/terminal/SttButton.tsx @@ -0,0 +1,107 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { useSTT } from "../../hooks/useSTT"; +import * as commands from "../../lib/tauri-commands"; + +interface Props { + sessionId: string; + sendInput: (sessionId: string, data: string) => Promise; +} + +export default function SttButton({ sessionId, sendInput }: Props) { + const { state, error, toggle, cancelRecording } = useSTT(sessionId, sendInput); + const [elapsed, setElapsed] = useState(0); + const timerRef = useRef | null>(null); + + // Track recording duration + useEffect(() => { + if (state === "recording") { + setElapsed(0); + timerRef.current = setInterval(() => setElapsed((e) => e + 1), 1000); + } else { + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + } + } + return () => { + if (timerRef.current) clearInterval(timerRef.current); + }; + }, [state]); + + const handleClick = useCallback(async () => { + // Auto-start STT container if not running + if (state === "idle") { + try { + const status = await commands.getSttStatus(); + if (!status.running) { + await commands.startStt(); + } + } catch { + // Container start failed, toggle will still attempt transcription + } + } + await toggle(); + }, [state, toggle]); + + const handleContextMenu = useCallback( + (e: React.MouseEvent) => { + e.preventDefault(); + if (state === "recording") { + cancelRecording(); + } + }, + [state, cancelRecording], + ); + + const formatTime = (seconds: number) => { + const m = Math.floor(seconds / 60); + const s = seconds % 60; + return `${m}:${s.toString().padStart(2, "0")}`; + }; + + return ( +
+ + {state === "recording" && ( + + {formatTime(elapsed)} + + )} + {state === "error" && error && ( + + {error} + + )} +
+ ); +} diff --git a/app/src/components/terminal/TerminalView.tsx b/app/src/components/terminal/TerminalView.tsx index 253d269..7230561 100644 --- a/app/src/components/terminal/TerminalView.tsx +++ b/app/src/components/terminal/TerminalView.tsx @@ -7,6 +7,7 @@ import { openUrl } from "@tauri-apps/plugin-opener"; import "@xterm/xterm/css/xterm.css"; import { useTerminal } from "../../hooks/useTerminal"; import { useAppState } from "../../store/appState"; +import SttButton from "./SttButton"; import { awsSsoRefresh } from "../../lib/tauri-commands"; import { UrlDetector } from "../../lib/urlDetector"; import UrlToast from "./UrlToast"; @@ -25,6 +26,7 @@ export default function TerminalView({ sessionId, active }: Props) { const detectorRef = useRef(null); const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal(); const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection); + const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled); const ssoBufferRef = useRef(""); const ssoTriggeredRef = useRef(false); @@ -424,6 +426,8 @@ export default function TerminalView({ sessionId, active }: Props) { > {isAutoFollow ? "▼ Following" : "▽ Paused"} + {/* STT mic button - bottom left */} + {sttEnabled && } {/* Jump to Current - bottom right, when scrolled up */} {!isAtBottom && (