From 86176d8830b5e3041a845c0c9521d3b3c29e2fb5 Mon Sep 17 00:00:00 2001 From: Josh Knapp Date: Thu, 5 Mar 2026 06:11:33 -0800 Subject: [PATCH] feat: add voice mode support via mic passthrough to container Enables Claude Code's /voice command inside Docker containers by capturing microphone audio in the Tauri webview and streaming it into the container via a FIFO pipe. Container: fake rec/arecord shims read PCM from a FIFO instead of a real mic. Audio bridge exec writes PCM from Tauri into the FIFO. Frontend: getUserMedia() + AudioWorklet captures 16kHz mono PCM and streams it to the container via invoke("send_audio_data"). UI: "Mic Off/On" toggle button in the terminal view. Co-Authored-By: Claude Opus 4.6 --- app/public/audio-capture-processor.js | 17 ++++ .../src/commands/terminal_commands.rs | 54 ++++++++++ app/src-tauri/src/docker/exec.rs | 18 +++- app/src-tauri/src/lib.rs | 3 + app/src/components/terminal/TerminalView.tsx | 30 ++++++ app/src/hooks/useVoice.ts | 98 +++++++++++++++++++ app/src/lib/tauri-commands.ts | 6 ++ container/Dockerfile | 8 ++ container/audio-shim | 16 +++ 9 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 app/public/audio-capture-processor.js create mode 100644 app/src/hooks/useVoice.ts create mode 100644 container/audio-shim diff --git a/app/public/audio-capture-processor.js b/app/public/audio-capture-processor.js new file mode 100644 index 0000000..49aec66 --- /dev/null +++ b/app/public/audio-capture-processor.js @@ -0,0 +1,17 @@ +class AudioCaptureProcessor extends AudioWorkletProcessor { + process(inputs, outputs, parameters) { + const input = inputs[0]; + if (input && input.length > 0 && input[0].length > 0) { + const samples = input[0]; // Float32Array, mono channel + const int16 = new Int16Array(samples.length); + for (let i = 0; i < samples.length; i++) { + const s = Math.max(-1, Math.min(1, samples[i])); + int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF; + } + this.port.postMessage(int16.buffer, [int16.buffer]); + } + return true; + } +} + +registerProcessor('audio-capture-processor', AudioCaptureProcessor); diff --git a/app/src-tauri/src/commands/terminal_commands.rs b/app/src-tauri/src/commands/terminal_commands.rs index 13889ef..a23604d 100644 --- a/app/src-tauri/src/commands/terminal_commands.rs +++ b/app/src-tauri/src/commands/terminal_commands.rs @@ -133,6 +133,10 @@ pub async fn close_terminal_session( session_id: String, state: State<'_, AppState>, ) -> Result<(), String> { + // Close audio bridge if it exists + let audio_session_id = format!("audio-{}", session_id); + state.exec_manager.close_session(&audio_session_id).await; + // Close terminal session state.exec_manager.close_session(&session_id).await; Ok(()) } @@ -156,3 +160,53 @@ pub async fn paste_image_to_terminal( .write_file_to_container(&container_id, &file_name, &image_data) .await } + +#[tauri::command] +pub async fn start_audio_bridge( + session_id: String, + state: State<'_, AppState>, +) -> Result<(), String> { + // Get container_id from the terminal session + let container_id = state.exec_manager.get_container_id(&session_id).await?; + + // Create audio bridge exec session with ID "audio-{session_id}" + // The loop handles reconnection when the FIFO reader (fake rec) is killed and restarted + let audio_session_id = format!("audio-{}", session_id); + let cmd = vec![ + "bash".to_string(), + "-c".to_string(), + "FIFO=/tmp/triple-c-audio-input; [ -p \"$FIFO\" ] || mkfifo \"$FIFO\"; trap '' PIPE; while true; do cat > \"$FIFO\" 2>/dev/null; sleep 0.1; done".to_string(), + ]; + + state + .exec_manager + .create_session_with_tty( + &container_id, + &audio_session_id, + cmd, + false, + |_data| { /* ignore output from the audio bridge */ }, + Box::new(|| { /* no exit handler needed */ }), + ) + .await +} + +#[tauri::command] +pub async fn send_audio_data( + session_id: String, + data: Vec, + state: State<'_, AppState>, +) -> Result<(), String> { + let audio_session_id = format!("audio-{}", session_id); + state.exec_manager.send_input(&audio_session_id, data).await +} + +#[tauri::command] +pub async fn stop_audio_bridge( + session_id: String, + state: State<'_, AppState>, +) -> Result<(), String> { + let audio_session_id = format!("audio-{}", session_id); + state.exec_manager.close_session(&audio_session_id).await; + Ok(()) +} diff --git a/app/src-tauri/src/docker/exec.rs b/app/src-tauri/src/docker/exec.rs index cdb9ac8..21f6b39 100644 --- a/app/src-tauri/src/docker/exec.rs +++ b/app/src-tauri/src/docker/exec.rs @@ -60,6 +60,22 @@ impl ExecSessionManager { on_output: F, on_exit: Box, ) -> Result<(), String> + where + F: Fn(Vec) + Send + 'static, + { + self.create_session_with_tty(container_id, session_id, cmd, true, on_output, on_exit) + .await + } + + pub async fn create_session_with_tty( + &self, + container_id: &str, + session_id: &str, + cmd: Vec, + tty: bool, + on_output: F, + on_exit: Box, + ) -> Result<(), String> where F: Fn(Vec) + Send + 'static, { @@ -72,7 +88,7 @@ impl ExecSessionManager { attach_stdin: Some(true), attach_stdout: Some(true), attach_stderr: Some(true), - tty: Some(true), + tty: Some(tty), cmd: Some(cmd), user: Some("claude".to_string()), working_dir: Some("/workspace".to_string()), diff --git a/app/src-tauri/src/lib.rs b/app/src-tauri/src/lib.rs index a508792..46c5948 100644 --- a/app/src-tauri/src/lib.rs +++ b/app/src-tauri/src/lib.rs @@ -101,6 +101,9 @@ pub fn run() { commands::terminal_commands::terminal_resize, commands::terminal_commands::close_terminal_session, commands::terminal_commands::paste_image_to_terminal, + commands::terminal_commands::start_audio_bridge, + commands::terminal_commands::send_audio_data, + commands::terminal_commands::stop_audio_bridge, // MCP commands::mcp_commands::list_mcp_servers, commands::mcp_commands::add_mcp_server, diff --git a/app/src/components/terminal/TerminalView.tsx b/app/src/components/terminal/TerminalView.tsx index 2c1e29d..fa14cb8 100644 --- a/app/src/components/terminal/TerminalView.tsx +++ b/app/src/components/terminal/TerminalView.tsx @@ -6,6 +6,7 @@ import { WebLinksAddon } from "@xterm/addon-web-links"; import { openUrl } from "@tauri-apps/plugin-opener"; import "@xterm/xterm/css/xterm.css"; import { useTerminal } from "../../hooks/useTerminal"; +import { useVoice } from "../../hooks/useVoice"; import { UrlDetector } from "../../lib/urlDetector"; import UrlToast from "./UrlToast"; @@ -23,6 +24,8 @@ export default function TerminalView({ sessionId, active }: Props) { const detectorRef = useRef(null); const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal(); + const voice = useVoice(sessionId); + const [detectedUrl, setDetectedUrl] = useState(null); const [imagePasteMsg, setImagePasteMsg] = useState(null); const [isAtBottom, setIsAtBottom] = useState(true); @@ -200,6 +203,7 @@ export default function TerminalView({ sessionId, active }: Props) { try { webglRef.current?.dispose(); } catch { /* may already be disposed */ } webglRef.current = null; term.dispose(); + voice.stop(); }; }, [sessionId]); // eslint-disable-line react-hooks/exhaustive-deps @@ -284,6 +288,32 @@ export default function TerminalView({ sessionId, active }: Props) { {imagePasteMsg} )} + {!isAtBottom && (