feat: add microphone selection to settings

Adds a dropdown in Settings to choose which audio input device to use for voice mode. Enumerates devices via the browser's mediaDevices API and persists the selection in AppSettings. The useVoice hook passes the selected deviceId to getUserMedia(). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feat: add voice mode support via mic passthrough to container
2026-03-05 06:15:47 -08:00 · 2026-03-05 06:11:33 -08:00
13 changed files with 364 additions and 1 deletions
--- a/app/public/audio-capture-processor.js
+++ b/app/public/audio-capture-processor.js
@@ -0,0 +1,17 @@
+class AudioCaptureProcessor extends AudioWorkletProcessor {
+  process(inputs, outputs, parameters) {
+    const input = inputs[0];
+    if (input && input.length > 0 && input[0].length > 0) {
+      const samples = input[0]; // Float32Array, mono channel
+      const int16 = new Int16Array(samples.length);
+      for (let i = 0; i < samples.length; i++) {
+        const s = Math.max(-1, Math.min(1, samples[i]));
+        int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
+      }
+      this.port.postMessage(int16.buffer, [int16.buffer]);
+    }
+    return true;
+  }
+}
+
+registerProcessor('audio-capture-processor', AudioCaptureProcessor);
--- a/app/src-tauri/src/commands/terminal_commands.rs
+++ b/app/src-tauri/src/commands/terminal_commands.rs
@@ -133,6 +133,10 @@ pub async fn close_terminal_session(
    session_id: String,
    state: State<'_, AppState>,
 ) -> Result<(), String> {
+    // Close audio bridge if it exists
+    let audio_session_id = format!("audio-{}", session_id);
+    state.exec_manager.close_session(&audio_session_id).await;
+    // Close terminal session
    state.exec_manager.close_session(&session_id).await;
    Ok(())
 }
@@ -156,3 +160,53 @@ pub async fn paste_image_to_terminal(
        .write_file_to_container(&container_id, &file_name, &image_data)
        .await
 }
+
+#[tauri::command]
+pub async fn start_audio_bridge(
+    session_id: String,
+    state: State<'_, AppState>,
+) -> Result<(), String> {
+    // Get container_id from the terminal session
+    let container_id = state.exec_manager.get_container_id(&session_id).await?;
+
+    // Create audio bridge exec session with ID "audio-{session_id}"
+    // The loop handles reconnection when the FIFO reader (fake rec) is killed and restarted
+    let audio_session_id = format!("audio-{}", session_id);
+    let cmd = vec![
+        "bash".to_string(),
+        "-c".to_string(),
+        "FIFO=/tmp/triple-c-audio-input; [ -p \"$FIFO\" ] || mkfifo \"$FIFO\"; trap '' PIPE; while true; do cat > \"$FIFO\" 2>/dev/null; sleep 0.1; done".to_string(),
+    ];
+
+    state
+        .exec_manager
+        .create_session_with_tty(
+            &container_id,
+            &audio_session_id,
+            cmd,
+            false,
+            |_data| { /* ignore output from the audio bridge */ },
+            Box::new(|| { /* no exit handler needed */ }),
+        )
+        .await
+}
+
+#[tauri::command]
+pub async fn send_audio_data(
+    session_id: String,
+    data: Vec<u8>,
+    state: State<'_, AppState>,
+) -> Result<(), String> {
+    let audio_session_id = format!("audio-{}", session_id);
+    state.exec_manager.send_input(&audio_session_id, data).await
+}
+
+#[tauri::command]
+pub async fn stop_audio_bridge(
+    session_id: String,
+    state: State<'_, AppState>,
+) -> Result<(), String> {
+    let audio_session_id = format!("audio-{}", session_id);
+    state.exec_manager.close_session(&audio_session_id).await;
+    Ok(())
+}
--- a/app/src-tauri/src/docker/exec.rs
+++ b/app/src-tauri/src/docker/exec.rs
@@ -60,6 +60,22 @@ impl ExecSessionManager {
        on_output: F,
        on_exit: Box<dyn FnOnce() + Send>,
    ) -> Result<(), String>
+    where
+        F: Fn(Vec<u8>) + Send + 'static,
+    {
+        self.create_session_with_tty(container_id, session_id, cmd, true, on_output, on_exit)
+            .await
+    }
+
+    pub async fn create_session_with_tty<F>(
+        &self,
+        container_id: &str,
+        session_id: &str,
+        cmd: Vec<String>,
+        tty: bool,
+        on_output: F,
+        on_exit: Box<dyn FnOnce() + Send>,
+    ) -> Result<(), String>
    where
        F: Fn(Vec<u8>) + Send + 'static,
    {
@@ -72,7 +88,7 @@ impl ExecSessionManager {
                    attach_stdin: Some(true),
                    attach_stdout: Some(true),
                    attach_stderr: Some(true),
-                    tty: Some(true),
+                    tty: Some(tty),
                    cmd: Some(cmd),
                    user: Some("claude".to_string()),
                    working_dir: Some("/workspace".to_string()),
--- a/app/src-tauri/src/lib.rs
+++ b/app/src-tauri/src/lib.rs
@@ -101,6 +101,9 @@ pub fn run() {
            commands::terminal_commands::terminal_resize,
            commands::terminal_commands::close_terminal_session,
            commands::terminal_commands::paste_image_to_terminal,
+            commands::terminal_commands::start_audio_bridge,
+            commands::terminal_commands::send_audio_data,
+            commands::terminal_commands::stop_audio_bridge,
            // MCP
            commands::mcp_commands::list_mcp_servers,
            commands::mcp_commands::add_mcp_server,
--- a/app/src-tauri/src/models/app_settings.rs
+++ b/app/src-tauri/src/models/app_settings.rs
@@ -70,6 +70,8 @@ pub struct AppSettings {
    pub dismissed_update_version: Option<String>,
    #[serde(default)]
    pub timezone: Option<String>,
+    #[serde(default)]
+    pub default_microphone: Option<String>,
 }

 impl Default for AppSettings {
@@ -87,6 +89,7 @@ impl Default for AppSettings {
            auto_check_updates: true,
            dismissed_update_version: None,
            timezone: None,
+            default_microphone: None,
        }
    }
 }
--- a/app/src/components/settings/MicrophoneSettings.tsx
+++ b/app/src/components/settings/MicrophoneSettings.tsx
@@ -0,0 +1,101 @@
+import { useState, useEffect, useCallback } from "react";
+import { useSettings } from "../../hooks/useSettings";
+
+interface AudioDevice {
+  deviceId: string;
+  label: string;
+}
+
+export default function MicrophoneSettings() {
+  const { appSettings, saveSettings } = useSettings();
+  const [devices, setDevices] = useState<AudioDevice[]>([]);
+  const [selected, setSelected] = useState(appSettings?.default_microphone ?? "");
+  const [loading, setLoading] = useState(false);
+  const [permissionNeeded, setPermissionNeeded] = useState(false);
+
+  // Sync local state when appSettings change
+  useEffect(() => {
+    setSelected(appSettings?.default_microphone ?? "");
+  }, [appSettings?.default_microphone]);
+
+  const enumerateDevices = useCallback(async () => {
+    setLoading(true);
+    setPermissionNeeded(false);
+    try {
+      // Request mic permission first so device labels are available
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      stream.getTracks().forEach((t) => t.stop());
+
+      const allDevices = await navigator.mediaDevices.enumerateDevices();
+      const mics = allDevices
+        .filter((d) => d.kind === "audioinput")
+        .map((d) => ({
+          deviceId: d.deviceId,
+          label: d.label || `Microphone (${d.deviceId.slice(0, 8)}...)`,
+        }));
+      setDevices(mics);
+    } catch {
+      setPermissionNeeded(true);
+    } finally {
+      setLoading(false);
+    }
+  }, []);
+
+  // Enumerate devices on mount
+  useEffect(() => {
+    enumerateDevices();
+  }, [enumerateDevices]);
+
+  const handleChange = async (deviceId: string) => {
+    setSelected(deviceId);
+    if (appSettings) {
+      await saveSettings({ ...appSettings, default_microphone: deviceId || null });
+    }
+  };
+
+  return (
+    <div>
+      <label className="block text-sm font-medium mb-1">Microphone</label>
+      <p className="text-xs text-[var(--text-secondary)] mb-1.5">
+        Audio input device for Claude Code voice mode (/voice)
+      </p>
+      {permissionNeeded ? (
+        <div className="flex items-center gap-2">
+          <span className="text-xs text-[var(--text-secondary)]">
+            Microphone permission required
+          </span>
+          <button
+            onClick={enumerateDevices}
+            className="text-xs px-2 py-0.5 text-[var(--accent)] hover:text-[var(--accent-hover)] hover:bg-[var(--bg-primary)] rounded transition-colors"
+          >
+            Grant Access
+          </button>
+        </div>
+      ) : (
+        <div className="flex items-center gap-2">
+          <select
+            value={selected}
+            onChange={(e) => handleChange(e.target.value)}
+            disabled={loading}
+            className="flex-1 px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
+          >
+            <option value="">System Default</option>
+            {devices.map((d) => (
+              <option key={d.deviceId} value={d.deviceId}>
+                {d.label}
+              </option>
+            ))}
+          </select>
+          <button
+            onClick={enumerateDevices}
+            disabled={loading}
+            title="Refresh microphone list"
+            className="text-xs px-2 py-1 text-[var(--text-secondary)] hover:text-[var(--text-primary)] hover:bg-[var(--bg-primary)] rounded transition-colors disabled:opacity-50"
+          >
+            {loading ? "..." : "Refresh"}
+          </button>
+        </div>
+      )}
+    </div>
+  );
+}
--- a/app/src/components/settings/SettingsPanel.tsx
+++ b/app/src/components/settings/SettingsPanel.tsx
@@ -2,6 +2,7 @@ import { useState, useEffect } from "react";
 import ApiKeyInput from "./ApiKeyInput";
 import DockerSettings from "./DockerSettings";
 import AwsSettings from "./AwsSettings";
+import MicrophoneSettings from "./MicrophoneSettings";
 import { useSettings } from "../../hooks/useSettings";
 import { useUpdates } from "../../hooks/useUpdates";
 import ClaudeInstructionsModal from "../projects/ClaudeInstructionsModal";
@@ -59,6 +60,8 @@ export default function SettingsPanel() {
      <DockerSettings />
      <AwsSettings />

+      <MicrophoneSettings />
+
      {/* Container Timezone */}
      <div>
        <label className="block text-sm font-medium mb-1">Container Timezone</label>
--- a/app/src/components/terminal/TerminalView.tsx
+++ b/app/src/components/terminal/TerminalView.tsx
@@ -6,6 +6,8 @@ import { WebLinksAddon } from "@xterm/addon-web-links";
 import { openUrl } from "@tauri-apps/plugin-opener";
 import "@xterm/xterm/css/xterm.css";
 import { useTerminal } from "../../hooks/useTerminal";
+import { useSettings } from "../../hooks/useSettings";
+import { useVoice } from "../../hooks/useVoice";
 import { UrlDetector } from "../../lib/urlDetector";
 import UrlToast from "./UrlToast";

@@ -22,6 +24,9 @@ export default function TerminalView({ sessionId, active }: Props) {
  const webglRef = useRef<WebglAddon | null>(null);
  const detectorRef = useRef<UrlDetector | null>(null);
  const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
+  const { appSettings } = useSettings();
+
+  const voice = useVoice(sessionId, appSettings?.default_microphone);

  const [detectedUrl, setDetectedUrl] = useState<string | null>(null);
  const [imagePasteMsg, setImagePasteMsg] = useState<string | null>(null);
@@ -200,6 +205,7 @@ export default function TerminalView({ sessionId, active }: Props) {
      try { webglRef.current?.dispose(); } catch { /* may already be disposed */ }
      webglRef.current = null;
      term.dispose();
+      voice.stop();
    };
  }, [sessionId]); // eslint-disable-line react-hooks/exhaustive-deps

@@ -284,6 +290,32 @@ export default function TerminalView({ sessionId, active }: Props) {
          {imagePasteMsg}
        </div>
      )}
+      <button
+        onClick={voice.toggle}
+        title={
+          voice.state === "active"
+            ? "Voice active — click to stop"
+            : voice.error
+              ? `Voice error: ${voice.error}`
+              : "Enable voice input for /voice mode"
+        }
+        className={`absolute bottom-4 left-4 z-50 px-3 py-1.5 rounded-md text-xs font-medium border shadow-lg transition-colors cursor-pointer ${
+          voice.state === "active"
+            ? "bg-[#1a3a2a] text-[#3fb950] border-[#238636] hover:bg-[#243b2a]"
+            : voice.state === "starting"
+              ? "bg-[#1f2937] text-[#d29922] border-[#30363d] opacity-75"
+              : voice.state === "error"
+                ? "bg-[#3a1a1a] text-[#ff7b72] border-[#da3633] hover:bg-[#4a2020]"
+                : "bg-[#1f2937] text-[#b1bac4] border-[#30363d] hover:bg-[#2d3748] hover:text-[#e6edf3]"
+        }`}
+        disabled={voice.state === "starting"}
+      >
+        {voice.state === "active"
+          ? "Mic On"
+          : voice.state === "starting"
+            ? "Mic..."
+            : "Mic Off"}
+      </button>
      {!isAtBottom && (
        <button
          onClick={handleScrollToBottom}
--- a/app/src/hooks/useVoice.ts
+++ b/app/src/hooks/useVoice.ts
@@ -0,0 +1,103 @@
+import { useCallback, useRef, useState } from "react";
+import * as commands from "../lib/tauri-commands";
+
+type VoiceState = "inactive" | "starting" | "active" | "error";
+
+export function useVoice(sessionId: string, deviceId?: string | null) {
+  const [state, setState] = useState<VoiceState>("inactive");
+  const [error, setError] = useState<string | null>(null);
+
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+  const workletRef = useRef<AudioWorkletNode | null>(null);
+
+  const start = useCallback(async () => {
+    if (state === "active" || state === "starting") return;
+    setState("starting");
+    setError(null);
+
+    try {
+      // 1. Start the audio bridge in the container (creates FIFO writer)
+      await commands.startAudioBridge(sessionId);
+
+      // 2. Get microphone access (use specific device if configured)
+      const audioConstraints: MediaTrackConstraints = {
+        channelCount: 1,
+        echoCancellation: true,
+        noiseSuppression: true,
+        autoGainControl: true,
+      };
+      if (deviceId) {
+        audioConstraints.deviceId = { exact: deviceId };
+      }
+
+      const stream = await navigator.mediaDevices.getUserMedia({
+        audio: audioConstraints,
+      });
+      streamRef.current = stream;
+
+      // 3. Create AudioContext at 16kHz (browser handles resampling)
+      const audioContext = new AudioContext({ sampleRate: 16000 });
+      audioContextRef.current = audioContext;
+
+      // 4. Load AudioWorklet processor
+      await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
+
+      // 5. Connect: mic → worklet → (silent) destination
+      const source = audioContext.createMediaStreamSource(stream);
+      const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
+      workletRef.current = processor;
+
+      // 6. Handle PCM chunks from the worklet
+      processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
+        const bytes = Array.from(new Uint8Array(event.data));
+        commands.sendAudioData(sessionId, bytes).catch(() => {
+          // Audio bridge may have been closed — ignore send errors
+        });
+      };
+
+      source.connect(processor);
+      processor.connect(audioContext.destination);
+
+      setState("active");
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      setError(msg);
+      setState("error");
+      // Clean up on failure
+      await commands.stopAudioBridge(sessionId).catch(() => {});
+    }
+  }, [sessionId, state, deviceId]);
+
+  const stop = useCallback(async () => {
+    // Tear down audio pipeline
+    workletRef.current?.disconnect();
+    workletRef.current = null;
+
+    if (audioContextRef.current) {
+      await audioContextRef.current.close().catch(() => {});
+      audioContextRef.current = null;
+    }
+
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach((t) => t.stop());
+      streamRef.current = null;
+    }
+
+    // Stop the container-side audio bridge
+    await commands.stopAudioBridge(sessionId).catch(() => {});
+
+    setState("inactive");
+    setError(null);
+  }, [sessionId]);
+
+  const toggle = useCallback(async () => {
+    if (state === "active") {
+      await stop();
+    } else {
+      await start();
+    }
+  }, [state, start, stop]);
+
+  return { state, error, start, stop, toggle };
+}
--- a/app/src/lib/tauri-commands.ts
+++ b/app/src/lib/tauri-commands.ts
@@ -49,6 +49,12 @@ export const closeTerminalSession = (sessionId: string) =>
  invoke<void>("close_terminal_session", { sessionId });
 export const pasteImageToTerminal = (sessionId: string, imageData: number[]) =>
  invoke<string>("paste_image_to_terminal", { sessionId, imageData });
+export const startAudioBridge = (sessionId: string) =>
+  invoke<void>("start_audio_bridge", { sessionId });
+export const sendAudioData = (sessionId: string, data: number[]) =>
+  invoke<void>("send_audio_data", { sessionId, data });
+export const stopAudioBridge = (sessionId: string) =>
+  invoke<void>("stop_audio_bridge", { sessionId });

 // MCP Servers
 export const listMcpServers = () => invoke<McpServer[]>("list_mcp_servers");
--- a/app/src/lib/types.ts
+++ b/app/src/lib/types.ts
@@ -100,6 +100,7 @@ export interface AppSettings {
  auto_check_updates: boolean;
  dismissed_update_version: string | null;
  timezone: string | null;
+  default_microphone: string | null;
 }

 export interface UpdateInfo {
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -111,6 +111,14 @@ RUN chmod +x /usr/local/bin/osc52-clipboard \
    && ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/xsel \
    && ln -sf /usr/local/bin/osc52-clipboard /usr/local/bin/pbcopy

+# ── Audio capture shim (voice mode) ────────────────────────────────────────
+# Provides fake rec/arecord that read PCM from a FIFO instead of a real mic,
+# allowing Claude Code voice mode to work inside the container.
+COPY audio-shim /usr/local/bin/audio-shim
+RUN chmod +x /usr/local/bin/audio-shim \
+    && ln -sf /usr/local/bin/audio-shim /usr/local/bin/rec \
+    && ln -sf /usr/local/bin/audio-shim /usr/local/bin/arecord
+
 COPY entrypoint.sh /usr/local/bin/entrypoint.sh
 RUN chmod +x /usr/local/bin/entrypoint.sh
 COPY triple-c-scheduler /usr/local/bin/triple-c-scheduler
--- a/container/audio-shim
+++ b/container/audio-shim
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Audio capture shim for Triple-C voice mode.
+# Claude Code spawns `rec` or `arecord` to capture mic audio.
+# Inside Docker there is no mic, so this shim reads PCM data from a
+# FIFO that the Tauri host app writes to, and outputs it on stdout.
+
+FIFO=/tmp/triple-c-audio-input
+
+# Create the FIFO if it doesn't already exist
+[ -p "$FIFO" ] || mkfifo "$FIFO" 2>/dev/null
+
+# Clean exit on SIGTERM (Claude Code sends this when recording stops)
+trap 'exit 0' TERM INT
+
+# Stream PCM from the FIFO to stdout until we get a signal or EOF
+cat "$FIFO"