feat: add voice mode support via mic passthrough to container
Some checks failed
Build App / build-macos (push) Successful in 2m21s
Build App / build-windows (push) Successful in 3m24s
Build App / sync-to-github (push) Has been cancelled
Build App / build-linux (push) Has been cancelled
Build Container / build-container (push) Successful in 54s

Enables Claude Code's /voice command inside Docker containers by
capturing microphone audio in the Tauri webview and streaming it
into the container via a FIFO pipe.

Container: fake rec/arecord shims read PCM from a FIFO instead of
a real mic. Audio bridge exec writes PCM from Tauri into the FIFO.
Frontend: getUserMedia() + AudioWorklet captures 16kHz mono PCM
and streams it to the container via invoke("send_audio_data").
UI: "Mic Off/On" toggle button in the terminal view.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-05 06:11:33 -08:00
parent 58a10c65e9
commit 86176d8830
9 changed files with 249 additions and 1 deletions

98
app/src/hooks/useVoice.ts Normal file
View File

@@ -0,0 +1,98 @@
import { useCallback, useRef, useState } from "react";
import * as commands from "../lib/tauri-commands";
type VoiceState = "inactive" | "starting" | "active" | "error";
export function useVoice(sessionId: string) {
const [state, setState] = useState<VoiceState>("inactive");
const [error, setError] = useState<string | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const workletRef = useRef<AudioWorkletNode | null>(null);
const start = useCallback(async () => {
if (state === "active" || state === "starting") return;
setState("starting");
setError(null);
try {
// 1. Start the audio bridge in the container (creates FIFO writer)
await commands.startAudioBridge(sessionId);
// 2. Get microphone access
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
streamRef.current = stream;
// 3. Create AudioContext at 16kHz (browser handles resampling)
const audioContext = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioContext;
// 4. Load AudioWorklet processor
await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
// 5. Connect: mic → worklet → (silent) destination
const source = audioContext.createMediaStreamSource(stream);
const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
workletRef.current = processor;
// 6. Handle PCM chunks from the worklet
processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
const bytes = Array.from(new Uint8Array(event.data));
commands.sendAudioData(sessionId, bytes).catch(() => {
// Audio bridge may have been closed — ignore send errors
});
};
source.connect(processor);
processor.connect(audioContext.destination);
setState("active");
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
setError(msg);
setState("error");
// Clean up on failure
await commands.stopAudioBridge(sessionId).catch(() => {});
}
}, [sessionId, state]);
const stop = useCallback(async () => {
// Tear down audio pipeline
workletRef.current?.disconnect();
workletRef.current = null;
if (audioContextRef.current) {
await audioContextRef.current.close().catch(() => {});
audioContextRef.current = null;
}
if (streamRef.current) {
streamRef.current.getTracks().forEach((t) => t.stop());
streamRef.current = null;
}
// Stop the container-side audio bridge
await commands.stopAudioBridge(sessionId).catch(() => {});
setState("inactive");
setError(null);
}, [sessionId]);
const toggle = useCallback(async () => {
if (state === "active") {
await stop();
} else {
await start();
}
}, [state, start, stop]);
return { state, error, start, stop, toggle };
}