Compare commits

..

4 Commits

Author SHA1 Message Date
c023d80c86 Hide mic toggle UI until upstream /voice WSL detection is fixed
All checks were successful
Build App / build-macos (push) Successful in 2m22s
Build App / build-windows (push) Successful in 3m57s
Build App / build-linux (push) Successful in 5m17s
Build App / sync-to-github (push) Successful in 11s
Claude Code's /voice command incorrectly detects containers running
on WSL2 hosts as unsupported WSL environments. Remove the mic button
from project cards and microphone settings from the settings panel,
but keep useVoice hook and MicrophoneSettings component for re-enabling
once the upstream issue is resolved.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:12:35 -08:00
33f02e65c0 Move mic button from terminal overlay to project action buttons
All checks were successful
Build App / build-macos (push) Successful in 2m53s
Build App / build-windows (push) Successful in 3m26s
Build App / build-linux (push) Successful in 5m59s
Build App / sync-to-github (push) Successful in 11s
Relocates the voice/mic toggle from a floating overlay on the terminal
view to the project command row (alongside Stop, Terminal, Config) so
it no longer blocks access to the terminal window.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 06:58:03 -08:00
c5e28f9caa feat: add microphone selection to settings
All checks were successful
Build App / build-macos (push) Successful in 2m21s
Build App / build-windows (push) Successful in 3m28s
Build App / build-linux (push) Successful in 5m42s
Build App / sync-to-github (push) Successful in 18s
Adds a dropdown in Settings to choose which audio input device to
use for voice mode. Enumerates devices via the browser's
mediaDevices API and persists the selection in AppSettings.
The useVoice hook passes the selected deviceId to getUserMedia().

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 06:15:47 -08:00
86176d8830 feat: add voice mode support via mic passthrough to container
Some checks failed
Build App / build-macos (push) Successful in 2m21s
Build App / build-windows (push) Successful in 3m24s
Build App / sync-to-github (push) Has been cancelled
Build App / build-linux (push) Has been cancelled
Build Container / build-container (push) Successful in 54s
Enables Claude Code's /voice command inside Docker containers by
capturing microphone audio in the Tauri webview and streaming it
into the container via a FIFO pipe.

Container: fake rec/arecord shims read PCM from a FIFO instead of
a real mic. Audio bridge exec writes PCM from Tauri into the FIFO.
Frontend: getUserMedia() + AudioWorklet captures 16kHz mono PCM
and streams it to the container via invoke("send_audio_data").
UI: "Mic Off/On" toggle button in the terminal view.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 06:11:33 -08:00
12 changed files with 330 additions and 1 deletions

View File

@@ -0,0 +1,17 @@
class AudioCaptureProcessor extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const input = inputs[0];
if (input && input.length > 0 && input[0].length > 0) {
const samples = input[0]; // Float32Array, mono channel
const int16 = new Int16Array(samples.length);
for (let i = 0; i < samples.length; i++) {
const s = Math.max(-1, Math.min(1, samples[i]));
int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
}
this.port.postMessage(int16.buffer, [int16.buffer]);
}
return true;
}
}
registerProcessor('audio-capture-processor', AudioCaptureProcessor);

View File

@@ -133,6 +133,10 @@ pub async fn close_terminal_session(
session_id: String, session_id: String,
state: State<'_, AppState>, state: State<'_, AppState>,
) -> Result<(), String> { ) -> Result<(), String> {
// Close audio bridge if it exists
let audio_session_id = format!("audio-{}", session_id);
state.exec_manager.close_session(&audio_session_id).await;
// Close terminal session
state.exec_manager.close_session(&session_id).await; state.exec_manager.close_session(&session_id).await;
Ok(()) Ok(())
} }
@@ -156,3 +160,53 @@ pub async fn paste_image_to_terminal(
.write_file_to_container(&container_id, &file_name, &image_data) .write_file_to_container(&container_id, &file_name, &image_data)
.await .await
} }
#[tauri::command]
pub async fn start_audio_bridge(
session_id: String,
state: State<'_, AppState>,
) -> Result<(), String> {
// Get container_id from the terminal session
let container_id = state.exec_manager.get_container_id(&session_id).await?;
// Create audio bridge exec session with ID "audio-{session_id}"
// The loop handles reconnection when the FIFO reader (fake rec) is killed and restarted
let audio_session_id = format!("audio-{}", session_id);
let cmd = vec![
"bash".to_string(),
"-c".to_string(),
"FIFO=/tmp/triple-c-audio-input; [ -p \"$FIFO\" ] || mkfifo \"$FIFO\"; trap '' PIPE; while true; do cat > \"$FIFO\" 2>/dev/null; sleep 0.1; done".to_string(),
];
state
.exec_manager
.create_session_with_tty(
&container_id,
&audio_session_id,
cmd,
false,
|_data| { /* ignore output from the audio bridge */ },
Box::new(|| { /* no exit handler needed */ }),
)
.await
}
#[tauri::command]
pub async fn send_audio_data(
session_id: String,
data: Vec<u8>,
state: State<'_, AppState>,
) -> Result<(), String> {
let audio_session_id = format!("audio-{}", session_id);
state.exec_manager.send_input(&audio_session_id, data).await
}
#[tauri::command]
pub async fn stop_audio_bridge(
session_id: String,
state: State<'_, AppState>,
) -> Result<(), String> {
let audio_session_id = format!("audio-{}", session_id);
state.exec_manager.close_session(&audio_session_id).await;
Ok(())
}

View File

@@ -60,6 +60,22 @@ impl ExecSessionManager {
on_output: F, on_output: F,
on_exit: Box<dyn FnOnce() + Send>, on_exit: Box<dyn FnOnce() + Send>,
) -> Result<(), String> ) -> Result<(), String>
where
F: Fn(Vec<u8>) + Send + 'static,
{
self.create_session_with_tty(container_id, session_id, cmd, true, on_output, on_exit)
.await
}
pub async fn create_session_with_tty<F>(
&self,
container_id: &str,
session_id: &str,
cmd: Vec<String>,
tty: bool,
on_output: F,
on_exit: Box<dyn FnOnce() + Send>,
) -> Result<(), String>
where where
F: Fn(Vec<u8>) + Send + 'static, F: Fn(Vec<u8>) + Send + 'static,
{ {
@@ -72,7 +88,7 @@ impl ExecSessionManager {
attach_stdin: Some(true), attach_stdin: Some(true),
attach_stdout: Some(true), attach_stdout: Some(true),
attach_stderr: Some(true), attach_stderr: Some(true),
tty: Some(true), tty: Some(tty),
cmd: Some(cmd), cmd: Some(cmd),
user: Some("claude".to_string()), user: Some("claude".to_string()),
working_dir: Some("/workspace".to_string()), working_dir: Some("/workspace".to_string()),

View File

@@ -101,6 +101,9 @@ pub fn run() {
commands::terminal_commands::terminal_resize, commands::terminal_commands::terminal_resize,
commands::terminal_commands::close_terminal_session, commands::terminal_commands::close_terminal_session,
commands::terminal_commands::paste_image_to_terminal, commands::terminal_commands::paste_image_to_terminal,
commands::terminal_commands::start_audio_bridge,
commands::terminal_commands::send_audio_data,
commands::terminal_commands::stop_audio_bridge,
// MCP // MCP
commands::mcp_commands::list_mcp_servers, commands::mcp_commands::list_mcp_servers,
commands::mcp_commands::add_mcp_server, commands::mcp_commands::add_mcp_server,

View File

@@ -70,6 +70,8 @@ pub struct AppSettings {
pub dismissed_update_version: Option<String>, pub dismissed_update_version: Option<String>,
#[serde(default)] #[serde(default)]
pub timezone: Option<String>, pub timezone: Option<String>,
#[serde(default)]
pub default_microphone: Option<String>,
} }
impl Default for AppSettings { impl Default for AppSettings {
@@ -87,6 +89,7 @@ impl Default for AppSettings {
auto_check_updates: true, auto_check_updates: true,
dismissed_update_version: None, dismissed_update_version: None,
timezone: None, timezone: None,
default_microphone: None,
} }
} }
} }

View File

@@ -869,3 +869,4 @@ function ActionButton({
</button> </button>
); );
} }

View File

@@ -0,0 +1,101 @@
import { useState, useEffect, useCallback } from "react";
import { useSettings } from "../../hooks/useSettings";
interface AudioDevice {
deviceId: string;
label: string;
}
export default function MicrophoneSettings() {
const { appSettings, saveSettings } = useSettings();
const [devices, setDevices] = useState<AudioDevice[]>([]);
const [selected, setSelected] = useState(appSettings?.default_microphone ?? "");
const [loading, setLoading] = useState(false);
const [permissionNeeded, setPermissionNeeded] = useState(false);
// Sync local state when appSettings change
useEffect(() => {
setSelected(appSettings?.default_microphone ?? "");
}, [appSettings?.default_microphone]);
const enumerateDevices = useCallback(async () => {
setLoading(true);
setPermissionNeeded(false);
try {
// Request mic permission first so device labels are available
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach((t) => t.stop());
const allDevices = await navigator.mediaDevices.enumerateDevices();
const mics = allDevices
.filter((d) => d.kind === "audioinput")
.map((d) => ({
deviceId: d.deviceId,
label: d.label || `Microphone (${d.deviceId.slice(0, 8)}...)`,
}));
setDevices(mics);
} catch {
setPermissionNeeded(true);
} finally {
setLoading(false);
}
}, []);
// Enumerate devices on mount
useEffect(() => {
enumerateDevices();
}, [enumerateDevices]);
const handleChange = async (deviceId: string) => {
setSelected(deviceId);
if (appSettings) {
await saveSettings({ ...appSettings, default_microphone: deviceId || null });
}
};
return (
<div>
<label className="block text-sm font-medium mb-1">Microphone</label>
<p className="text-xs text-[var(--text-secondary)] mb-1.5">
Audio input device for Claude Code voice mode (/voice)
</p>
{permissionNeeded ? (
<div className="flex items-center gap-2">
<span className="text-xs text-[var(--text-secondary)]">
Microphone permission required
</span>
<button
onClick={enumerateDevices}
className="text-xs px-2 py-0.5 text-[var(--accent)] hover:text-[var(--accent-hover)] hover:bg-[var(--bg-primary)] rounded transition-colors"
>
Grant Access
</button>
</div>
) : (
<div className="flex items-center gap-2">
<select
value={selected}
onChange={(e) => handleChange(e.target.value)}
disabled={loading}
className="flex-1 px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
>
<option value="">System Default</option>
{devices.map((d) => (
<option key={d.deviceId} value={d.deviceId}>
{d.label}
</option>
))}
</select>
<button
onClick={enumerateDevices}
disabled={loading}
title="Refresh microphone list"
className="text-xs px-2 py-1 text-[var(--text-secondary)] hover:text-[var(--text-primary)] hover:bg-[var(--bg-primary)] rounded transition-colors disabled:opacity-50"
>
{loading ? "..." : "Refresh"}
</button>
</div>
)}
</div>
);
}

103
app/src/hooks/useVoice.ts Normal file
View File

@@ -0,0 +1,103 @@
import { useCallback, useRef, useState } from "react";
import * as commands from "../lib/tauri-commands";
type VoiceState = "inactive" | "starting" | "active" | "error";
export function useVoice(sessionId: string, deviceId?: string | null) {
const [state, setState] = useState<VoiceState>("inactive");
const [error, setError] = useState<string | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const workletRef = useRef<AudioWorkletNode | null>(null);
const start = useCallback(async () => {
if (state === "active" || state === "starting") return;
setState("starting");
setError(null);
try {
// 1. Start the audio bridge in the container (creates FIFO writer)
await commands.startAudioBridge(sessionId);
// 2. Get microphone access (use specific device if configured)
const audioConstraints: MediaTrackConstraints = {
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
};
if (deviceId) {
audioConstraints.deviceId = { exact: deviceId };
}
const stream = await navigator.mediaDevices.getUserMedia({
audio: audioConstraints,
});
streamRef.current = stream;
// 3. Create AudioContext at 16kHz (browser handles resampling)
const audioContext = new AudioContext({ sampleRate: 16000 });
audioContextRef.current = audioContext;
// 4. Load AudioWorklet processor
await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
// 5. Connect: mic → worklet → (silent) destination
const source = audioContext.createMediaStreamSource(stream);
const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
workletRef.current = processor;
// 6. Handle PCM chunks from the worklet
processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
const bytes = Array.from(new Uint8Array(event.data));
commands.sendAudioData(sessionId, bytes).catch(() => {
// Audio bridge may have been closed — ignore send errors
});
};
source.connect(processor);
processor.connect(audioContext.destination);
setState("active");
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
setError(msg);
setState("error");
// Clean up on failure
await commands.stopAudioBridge(sessionId).catch(() => {});
}
}, [sessionId, state, deviceId]);
const stop = useCallback(async () => {
// Tear down audio pipeline
workletRef.current?.disconnect();
workletRef.current = null;
if (audioContextRef.current) {
await audioContextRef.current.close().catch(() => {});
audioContextRef.current = null;
}
if (streamRef.current) {
streamRef.current.getTracks().forEach((t) => t.stop());
streamRef.current = null;
}
// Stop the container-side audio bridge
await commands.stopAudioBridge(sessionId).catch(() => {});
setState("inactive");
setError(null);
}, [sessionId]);
const toggle = useCallback(async () => {
if (state === "active") {
await stop();
} else {
await start();
}
}, [state, start, stop]);
return { state, error, start, stop, toggle };
}

View File

@@ -49,6 +49,12 @@ export const closeTerminalSession = (sessionId: string) =>
invoke<void>("close_terminal_session", { sessionId }); invoke<void>("close_terminal_session", { sessionId });
export const pasteImageToTerminal = (sessionId: string, imageData: number[]) => export const pasteImageToTerminal = (sessionId: string, imageData: number[]) =>
invoke<string>("paste_image_to_terminal", { sessionId, imageData }); invoke<string>("paste_image_to_terminal", { sessionId, imageData });
export const startAudioBridge = (sessionId: string) =>
invoke<void>("start_audio_bridge", { sessionId });
export const sendAudioData = (sessionId: string, data: number[]) =>
invoke<void>("send_audio_data", { sessionId, data });
export const stopAudioBridge = (sessionId: string) =>
invoke<void>("stop_audio_bridge", { sessionId });
// MCP Servers // MCP Servers
export const listMcpServers = () => invoke<McpServer[]>("list_mcp_servers"); export const listMcpServers = () => invoke<McpServer[]>("list_mcp_servers");

View File

@@ -100,6 +100,7 @@ export interface AppSettings {
auto_check_updates: boolean; auto_check_updates: boolean;
dismissed_update_version: string | null; dismissed_update_version: string | null;
timezone: string | null; timezone: string | null;
default_microphone: string | null;
} }
export interface UpdateInfo { export interface UpdateInfo {

View File

@@ -111,6 +111,14 @@ RUN chmod +x /usr/local/bin/osc52-clipboard \
&& ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/xsel \ && ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/xsel \
&& ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/pbcopy && ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/pbcopy
# ── Audio capture shim (voice mode) ────────────────────────────────────────
# Provides fake rec/arecord that read PCM from a FIFO instead of a real mic,
# allowing Claude Code voice mode to work inside the container.
COPY audio-shim /usr/local/bin/audio-shim
RUN chmod +x /usr/local/bin/audio-shim \
&& ln -sf /usr/local/bin/audio-shim /usr/local/bin/rec \
&& ln -sf /usr/local/bin/audio-shim /usr/local/bin/arecord
COPY entrypoint.sh /usr/local/bin/entrypoint.sh COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh RUN chmod +x /usr/local/bin/entrypoint.sh
COPY triple-c-scheduler /usr/local/bin/triple-c-scheduler COPY triple-c-scheduler /usr/local/bin/triple-c-scheduler

16
container/audio-shim Normal file
View File

@@ -0,0 +1,16 @@
#!/bin/bash
# Audio capture shim for Triple-C voice mode.
# Claude Code spawns `rec` or `arecord` to capture mic audio.
# Inside Docker there is no mic, so this shim reads PCM data from a
# FIFO that the Tauri host app writes to, and outputs it on stdout.
FIFO=/tmp/triple-c-audio-input
# Create the FIFO if it doesn't already exist
[ -p "$FIFO" ] || mkfifo "$FIFO" 2>/dev/null
# Clean exit on SIGTERM (Claude Code sends this when recording stops)
trap 'exit 0' TERM INT
# Stream PCM from the FIFO to stdout until we get a signal or EOF
cat "$FIFO"