feat: add voice mode support via mic passthrough to container
Some checks failed
Build App / build-macos (push) Successful in 2m21s
Build App / build-windows (push) Successful in 3m24s
Build App / sync-to-github (push) Has been cancelled
Build App / build-linux (push) Has been cancelled
Build Container / build-container (push) Successful in 54s
Some checks failed
Build App / build-macos (push) Successful in 2m21s
Build App / build-windows (push) Successful in 3m24s
Build App / sync-to-github (push) Has been cancelled
Build App / build-linux (push) Has been cancelled
Build Container / build-container (push) Successful in 54s
Enables Claude Code's /voice command inside Docker containers by
capturing microphone audio in the Tauri webview and streaming it
into the container via a FIFO pipe.
Container: fake rec/arecord shims read PCM from a FIFO instead of
a real mic. Audio bridge exec writes PCM from Tauri into the FIFO.
Frontend: getUserMedia() + AudioWorklet captures 16kHz mono PCM
and streams it to the container via invoke("send_audio_data").
UI: "Mic Off/On" toggle button in the terminal view.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import { WebLinksAddon } from "@xterm/addon-web-links";
|
||||
import { openUrl } from "@tauri-apps/plugin-opener";
|
||||
import "@xterm/xterm/css/xterm.css";
|
||||
import { useTerminal } from "../../hooks/useTerminal";
|
||||
import { useVoice } from "../../hooks/useVoice";
|
||||
import { UrlDetector } from "../../lib/urlDetector";
|
||||
import UrlToast from "./UrlToast";
|
||||
|
||||
@@ -23,6 +24,8 @@ export default function TerminalView({ sessionId, active }: Props) {
|
||||
const detectorRef = useRef<UrlDetector | null>(null);
|
||||
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
|
||||
|
||||
const voice = useVoice(sessionId);
|
||||
|
||||
const [detectedUrl, setDetectedUrl] = useState<string | null>(null);
|
||||
const [imagePasteMsg, setImagePasteMsg] = useState<string | null>(null);
|
||||
const [isAtBottom, setIsAtBottom] = useState(true);
|
||||
@@ -200,6 +203,7 @@ export default function TerminalView({ sessionId, active }: Props) {
|
||||
try { webglRef.current?.dispose(); } catch { /* may already be disposed */ }
|
||||
webglRef.current = null;
|
||||
term.dispose();
|
||||
voice.stop();
|
||||
};
|
||||
}, [sessionId]); // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
@@ -284,6 +288,32 @@ export default function TerminalView({ sessionId, active }: Props) {
|
||||
{imagePasteMsg}
|
||||
</div>
|
||||
)}
|
||||
<button
|
||||
onClick={voice.toggle}
|
||||
title={
|
||||
voice.state === "active"
|
||||
? "Voice active — click to stop"
|
||||
: voice.error
|
||||
? `Voice error: ${voice.error}`
|
||||
: "Enable voice input for /voice mode"
|
||||
}
|
||||
className={`absolute bottom-4 left-4 z-50 px-3 py-1.5 rounded-md text-xs font-medium border shadow-lg transition-colors cursor-pointer ${
|
||||
voice.state === "active"
|
||||
? "bg-[#1a3a2a] text-[#3fb950] border-[#238636] hover:bg-[#243b2a]"
|
||||
: voice.state === "starting"
|
||||
? "bg-[#1f2937] text-[#d29922] border-[#30363d] opacity-75"
|
||||
: voice.state === "error"
|
||||
? "bg-[#3a1a1a] text-[#ff7b72] border-[#da3633] hover:bg-[#4a2020]"
|
||||
: "bg-[#1f2937] text-[#b1bac4] border-[#30363d] hover:bg-[#2d3748] hover:text-[#e6edf3]"
|
||||
}`}
|
||||
disabled={voice.state === "starting"}
|
||||
>
|
||||
{voice.state === "active"
|
||||
? "Mic On"
|
||||
: voice.state === "starting"
|
||||
? "Mic..."
|
||||
: "Mic Off"}
|
||||
</button>
|
||||
{!isAtBottom && (
|
||||
<button
|
||||
onClick={handleScrollToBottom}
|
||||
|
||||
98
app/src/hooks/useVoice.ts
Normal file
98
app/src/hooks/useVoice.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { useCallback, useRef, useState } from "react";
|
||||
import * as commands from "../lib/tauri-commands";
|
||||
|
||||
type VoiceState = "inactive" | "starting" | "active" | "error";
|
||||
|
||||
export function useVoice(sessionId: string) {
|
||||
const [state, setState] = useState<VoiceState>("inactive");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const workletRef = useRef<AudioWorkletNode | null>(null);
|
||||
|
||||
const start = useCallback(async () => {
|
||||
if (state === "active" || state === "starting") return;
|
||||
setState("starting");
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
// 1. Start the audio bridge in the container (creates FIFO writer)
|
||||
await commands.startAudioBridge(sessionId);
|
||||
|
||||
// 2. Get microphone access
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
streamRef.current = stream;
|
||||
|
||||
// 3. Create AudioContext at 16kHz (browser handles resampling)
|
||||
const audioContext = new AudioContext({ sampleRate: 16000 });
|
||||
audioContextRef.current = audioContext;
|
||||
|
||||
// 4. Load AudioWorklet processor
|
||||
await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
|
||||
|
||||
// 5. Connect: mic → worklet → (silent) destination
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
|
||||
workletRef.current = processor;
|
||||
|
||||
// 6. Handle PCM chunks from the worklet
|
||||
processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
|
||||
const bytes = Array.from(new Uint8Array(event.data));
|
||||
commands.sendAudioData(sessionId, bytes).catch(() => {
|
||||
// Audio bridge may have been closed — ignore send errors
|
||||
});
|
||||
};
|
||||
|
||||
source.connect(processor);
|
||||
processor.connect(audioContext.destination);
|
||||
|
||||
setState("active");
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
setError(msg);
|
||||
setState("error");
|
||||
// Clean up on failure
|
||||
await commands.stopAudioBridge(sessionId).catch(() => {});
|
||||
}
|
||||
}, [sessionId, state]);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
// Tear down audio pipeline
|
||||
workletRef.current?.disconnect();
|
||||
workletRef.current = null;
|
||||
|
||||
if (audioContextRef.current) {
|
||||
await audioContextRef.current.close().catch(() => {});
|
||||
audioContextRef.current = null;
|
||||
}
|
||||
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
|
||||
// Stop the container-side audio bridge
|
||||
await commands.stopAudioBridge(sessionId).catch(() => {});
|
||||
|
||||
setState("inactive");
|
||||
setError(null);
|
||||
}, [sessionId]);
|
||||
|
||||
const toggle = useCallback(async () => {
|
||||
if (state === "active") {
|
||||
await stop();
|
||||
} else {
|
||||
await start();
|
||||
}
|
||||
}, [state, start, stop]);
|
||||
|
||||
return { state, error, start, stop, toggle };
|
||||
}
|
||||
@@ -49,6 +49,12 @@ export const closeTerminalSession = (sessionId: string) =>
|
||||
invoke<void>("close_terminal_session", { sessionId });
|
||||
export const pasteImageToTerminal = (sessionId: string, imageData: number[]) =>
|
||||
invoke<string>("paste_image_to_terminal", { sessionId, imageData });
|
||||
export const startAudioBridge = (sessionId: string) =>
|
||||
invoke<void>("start_audio_bridge", { sessionId });
|
||||
export const sendAudioData = (sessionId: string, data: number[]) =>
|
||||
invoke<void>("send_audio_data", { sessionId, data });
|
||||
export const stopAudioBridge = (sessionId: string) =>
|
||||
invoke<void>("stop_audio_bridge", { sessionId });
|
||||
|
||||
// MCP Servers
|
||||
export const listMcpServers = () => invoke<McpServer[]>("list_mcp_servers");
|
||||
|
||||
Reference in New Issue
Block a user