Add speech-to-text feature using Faster Whisper container
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Adds a mic button to the terminal UI that captures speech, transcribes it via a Faster Whisper sidecar container, and injects the text into the terminal input. Includes settings panel for model selection (tiny/small/medium), port config, and container lifecycle management. - stt-container/: Dockerfile + FastAPI server for Whisper transcription - Rust backend: STT container management, transcribe_audio IPC command - Frontend: useSTT hook, SttButton, SttSettings, WAV encoder - CI: Gitea Actions workflow for multi-arch STT image builds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,7 @@ import { detectHostTimezone } from "../../lib/tauri-commands";
|
||||
import type { EnvVar } from "../../lib/types";
|
||||
import Tooltip from "../ui/Tooltip";
|
||||
import WebTerminalSettings from "./WebTerminalSettings";
|
||||
import SttSettings from "./SttSettings";
|
||||
|
||||
export default function SettingsPanel() {
|
||||
const { appSettings, saveSettings } = useSettings();
|
||||
@@ -120,6 +121,9 @@ export default function SettingsPanel() {
|
||||
{/* Web Terminal */}
|
||||
<WebTerminalSettings />
|
||||
|
||||
{/* Speech to Text */}
|
||||
<SttSettings />
|
||||
|
||||
{/* Updates section */}
|
||||
<div>
|
||||
<label className="block text-sm font-medium mb-2">Updates<Tooltip text="Check for new versions of the Triple-C app and container image." /></label>
|
||||
|
||||
249
app/src/components/settings/SttSettings.tsx
Normal file
249
app/src/components/settings/SttSettings.tsx
Normal file
@@ -0,0 +1,249 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { useSettings } from "../../hooks/useSettings";
|
||||
import { getSttStatus, startStt, stopStt, pullSttImage, buildSttImage } from "../../lib/tauri-commands";
|
||||
import { listen } from "@tauri-apps/api/event";
|
||||
import type { SttStatus } from "../../lib/types";
|
||||
import Tooltip from "../ui/Tooltip";
|
||||
|
||||
export default function SttSettings() {
|
||||
const { appSettings, saveSettings } = useSettings();
|
||||
const [status, setStatus] = useState<SttStatus | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [pulling, setPulling] = useState(false);
|
||||
const [building, setBuilding] = useState(false);
|
||||
const [buildLog, setBuildLog] = useState<string | null>(null);
|
||||
const [model, setModel] = useState(appSettings?.stt?.model ?? "tiny");
|
||||
const [port, setPort] = useState(String(appSettings?.stt?.port ?? 9876));
|
||||
const [language, setLanguage] = useState(appSettings?.stt?.language ?? "");
|
||||
|
||||
useEffect(() => {
|
||||
setModel(appSettings?.stt?.model ?? "tiny");
|
||||
setPort(String(appSettings?.stt?.port ?? 9876));
|
||||
setLanguage(appSettings?.stt?.language ?? "");
|
||||
}, [appSettings?.stt?.model, appSettings?.stt?.port, appSettings?.stt?.language]);
|
||||
|
||||
useEffect(() => {
|
||||
refreshStatus();
|
||||
}, []);
|
||||
|
||||
const refreshStatus = () => {
|
||||
getSttStatus().then(setStatus).catch(console.error);
|
||||
};
|
||||
|
||||
const handleToggleEnabled = async () => {
|
||||
if (!appSettings) return;
|
||||
const newEnabled = !appSettings.stt.enabled;
|
||||
await saveSettings({
|
||||
...appSettings,
|
||||
stt: { ...appSettings.stt, enabled: newEnabled },
|
||||
});
|
||||
};
|
||||
|
||||
const handleSaveModel = async () => {
|
||||
if (!appSettings) return;
|
||||
await saveSettings({
|
||||
...appSettings,
|
||||
stt: { ...appSettings.stt, model },
|
||||
});
|
||||
};
|
||||
|
||||
const handleSavePort = async () => {
|
||||
if (!appSettings) return;
|
||||
const portNum = parseInt(port, 10);
|
||||
if (isNaN(portNum) || portNum < 1 || portNum > 65535) return;
|
||||
await saveSettings({
|
||||
...appSettings,
|
||||
stt: { ...appSettings.stt, port: portNum },
|
||||
});
|
||||
};
|
||||
|
||||
const handleSaveLanguage = async () => {
|
||||
if (!appSettings) return;
|
||||
await saveSettings({
|
||||
...appSettings,
|
||||
stt: { ...appSettings.stt, language: language || null },
|
||||
});
|
||||
};
|
||||
|
||||
const handleStartStop = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
if (status?.running) {
|
||||
await stopStt();
|
||||
} else {
|
||||
await startStt();
|
||||
}
|
||||
refreshStatus();
|
||||
} catch (e) {
|
||||
console.error("STT toggle failed:", e);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePull = async () => {
|
||||
setPulling(true);
|
||||
setBuildLog(null);
|
||||
const unlisten = await listen<string>("stt-pull-progress", (event) => {
|
||||
setBuildLog(event.payload);
|
||||
});
|
||||
try {
|
||||
await pullSttImage();
|
||||
refreshStatus();
|
||||
} catch (e) {
|
||||
console.error("STT image pull failed:", e);
|
||||
setBuildLog(`Error: ${e}`);
|
||||
} finally {
|
||||
setPulling(false);
|
||||
unlisten();
|
||||
}
|
||||
};
|
||||
|
||||
const handleBuild = async () => {
|
||||
setBuilding(true);
|
||||
setBuildLog(null);
|
||||
const unlisten = await listen<string>("stt-build-progress", (event) => {
|
||||
setBuildLog(event.payload);
|
||||
});
|
||||
try {
|
||||
await buildSttImage();
|
||||
refreshStatus();
|
||||
} catch (e) {
|
||||
console.error("STT image build failed:", e);
|
||||
setBuildLog(`Error: ${e}`);
|
||||
} finally {
|
||||
setBuilding(false);
|
||||
unlisten();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
<label className="block text-sm font-medium mb-1">
|
||||
Speech to Text
|
||||
<Tooltip text="Transcribe speech to text using Faster Whisper in a Docker container. Adds a mic button to the terminal." />
|
||||
</label>
|
||||
<p className="text-xs text-[var(--text-secondary)] mb-2">
|
||||
Click the mic button in the terminal to dictate text via speech recognition.
|
||||
</p>
|
||||
|
||||
<div className="space-y-2">
|
||||
{/* Enable toggle */}
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={handleToggleEnabled}
|
||||
className={`px-2 py-0.5 text-xs rounded transition-colors ${
|
||||
appSettings?.stt?.enabled
|
||||
? "bg-[var(--success)] text-white"
|
||||
: "bg-[var(--bg-primary)] border border-[var(--border-color)] text-[var(--text-secondary)]"
|
||||
}`}
|
||||
>
|
||||
{appSettings?.stt?.enabled ? "ON" : "OFF"}
|
||||
</button>
|
||||
<span className="text-xs text-[var(--text-secondary)]">
|
||||
{appSettings?.stt?.enabled ? "Enabled" : "Disabled"}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{appSettings?.stt?.enabled && (
|
||||
<>
|
||||
{/* Model selector */}
|
||||
<div>
|
||||
<label className="block text-xs text-[var(--text-secondary)] mb-1">Model</label>
|
||||
<select
|
||||
value={model}
|
||||
onChange={(e) => setModel(e.target.value)}
|
||||
onBlur={handleSaveModel}
|
||||
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||
>
|
||||
<option value="tiny">Tiny (fastest, ~75MB)</option>
|
||||
<option value="small">Small (balanced, ~500MB)</option>
|
||||
<option value="medium">Medium (most accurate, ~1.5GB)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Port */}
|
||||
<div>
|
||||
<label className="block text-xs text-[var(--text-secondary)] mb-1">Port</label>
|
||||
<input
|
||||
type="number"
|
||||
value={port}
|
||||
onChange={(e) => setPort(e.target.value)}
|
||||
onBlur={handleSavePort}
|
||||
min={1}
|
||||
max={65535}
|
||||
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Language */}
|
||||
<div>
|
||||
<label className="block text-xs text-[var(--text-secondary)] mb-1">Language (optional)</label>
|
||||
<input
|
||||
type="text"
|
||||
value={language}
|
||||
onChange={(e) => setLanguage(e.target.value)}
|
||||
onBlur={handleSaveLanguage}
|
||||
placeholder="Auto-detect"
|
||||
className="w-full px-2 py-1 text-sm bg-[var(--bg-primary)] border border-[var(--border-color)] rounded focus:outline-none focus:border-[var(--accent)]"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Container status + controls */}
|
||||
<div className="pt-1">
|
||||
<label className="block text-xs text-[var(--text-secondary)] mb-1">STT Container</label>
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<span className="text-xs text-[var(--text-secondary)]">
|
||||
{status?.image_exists
|
||||
? status.running
|
||||
? `Running (port ${status.port}, model: ${status.model})`
|
||||
: status.container_exists
|
||||
? "Stopped"
|
||||
: "Image ready"
|
||||
: "No image"}
|
||||
</span>
|
||||
{status?.image_exists && (
|
||||
<button
|
||||
onClick={handleStartStop}
|
||||
disabled={loading}
|
||||
className={`px-2 py-0.5 text-xs rounded transition-colors ${
|
||||
status?.running
|
||||
? "text-[var(--error)] hover:bg-[var(--bg-primary)]"
|
||||
: "text-[var(--success)] hover:bg-[var(--bg-primary)]"
|
||||
}`}
|
||||
>
|
||||
{loading ? "..." : status?.running ? "Stop" : "Start"}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Image actions */}
|
||||
<div className="flex items-center gap-2 mt-2">
|
||||
<button
|
||||
onClick={handlePull}
|
||||
disabled={pulling || building}
|
||||
className="px-3 py-1 text-xs bg-[var(--bg-primary)] border border-[var(--border-color)] rounded hover:bg-[var(--border-color)] disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{pulling ? "Pulling..." : "Pull Image"}
|
||||
</button>
|
||||
<button
|
||||
onClick={handleBuild}
|
||||
disabled={pulling || building}
|
||||
className="px-3 py-1 text-xs bg-[var(--bg-primary)] border border-[var(--border-color)] rounded hover:bg-[var(--border-color)] disabled:opacity-50 transition-colors"
|
||||
>
|
||||
{building ? "Building..." : "Build Locally"}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{buildLog && (
|
||||
<pre className="mt-2 text-[10px] text-[var(--text-secondary)] bg-[var(--bg-primary)] border border-[var(--border-color)] rounded px-2 py-1 max-h-20 overflow-y-auto whitespace-pre-wrap">
|
||||
{buildLog}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
107
app/src/components/terminal/SttButton.tsx
Normal file
107
app/src/components/terminal/SttButton.tsx
Normal file
@@ -0,0 +1,107 @@
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { useSTT } from "../../hooks/useSTT";
|
||||
import * as commands from "../../lib/tauri-commands";
|
||||
|
||||
interface Props {
|
||||
sessionId: string;
|
||||
sendInput: (sessionId: string, data: string) => Promise<void>;
|
||||
}
|
||||
|
||||
export default function SttButton({ sessionId, sendInput }: Props) {
|
||||
const { state, error, toggle, cancelRecording } = useSTT(sessionId, sendInput);
|
||||
const [elapsed, setElapsed] = useState(0);
|
||||
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
|
||||
// Track recording duration
|
||||
useEffect(() => {
|
||||
if (state === "recording") {
|
||||
setElapsed(0);
|
||||
timerRef.current = setInterval(() => setElapsed((e) => e + 1), 1000);
|
||||
} else {
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
}
|
||||
return () => {
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
};
|
||||
}, [state]);
|
||||
|
||||
const handleClick = useCallback(async () => {
|
||||
// Auto-start STT container if not running
|
||||
if (state === "idle") {
|
||||
try {
|
||||
const status = await commands.getSttStatus();
|
||||
if (!status.running) {
|
||||
await commands.startStt();
|
||||
}
|
||||
} catch {
|
||||
// Container start failed, toggle will still attempt transcription
|
||||
}
|
||||
}
|
||||
await toggle();
|
||||
}, [state, toggle]);
|
||||
|
||||
const handleContextMenu = useCallback(
|
||||
(e: React.MouseEvent) => {
|
||||
e.preventDefault();
|
||||
if (state === "recording") {
|
||||
cancelRecording();
|
||||
}
|
||||
},
|
||||
[state, cancelRecording],
|
||||
);
|
||||
|
||||
const formatTime = (seconds: number) => {
|
||||
const m = Math.floor(seconds / 60);
|
||||
const s = seconds % 60;
|
||||
return `${m}:${s.toString().padStart(2, "0")}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="absolute bottom-4 left-4 z-50 flex items-center gap-2">
|
||||
<button
|
||||
onClick={handleClick}
|
||||
onContextMenu={handleContextMenu}
|
||||
disabled={state === "transcribing"}
|
||||
className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${
|
||||
state === "recording"
|
||||
? "bg-[#f85149] text-white shadow-lg animate-pulse"
|
||||
: state === "transcribing"
|
||||
? "bg-[#1f2937] text-[#58a6ff] border border-[#30363d] opacity-80"
|
||||
: "bg-[#1f2937]/80 text-[#8b949e] border border-[#30363d] hover:text-[#e6edf3] hover:bg-[#2d3748]"
|
||||
}`}
|
||||
title={
|
||||
state === "recording"
|
||||
? "Click to stop and transcribe (right-click to cancel)"
|
||||
: state === "transcribing"
|
||||
? "Transcribing..."
|
||||
: "Speech to text"
|
||||
}
|
||||
>
|
||||
{state === "transcribing" ? (
|
||||
<svg className="w-4 h-4 animate-spin" viewBox="0 0 24 24" fill="none">
|
||||
<circle cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="2" opacity="0.25" />
|
||||
<path d="M12 2a10 10 0 0 1 10 10" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
|
||||
</svg>
|
||||
) : (
|
||||
<svg className="w-4 h-4" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z" />
|
||||
<path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
|
||||
</svg>
|
||||
)}
|
||||
</button>
|
||||
{state === "recording" && (
|
||||
<span className="text-xs text-[#f85149] font-mono bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d]">
|
||||
{formatTime(elapsed)}
|
||||
</span>
|
||||
)}
|
||||
{state === "error" && error && (
|
||||
<span className="text-xs text-[#f85149] bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d] max-w-[200px] truncate">
|
||||
{error}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import { openUrl } from "@tauri-apps/plugin-opener";
|
||||
import "@xterm/xterm/css/xterm.css";
|
||||
import { useTerminal } from "../../hooks/useTerminal";
|
||||
import { useAppState } from "../../store/appState";
|
||||
import SttButton from "./SttButton";
|
||||
import { awsSsoRefresh } from "../../lib/tauri-commands";
|
||||
import { UrlDetector } from "../../lib/urlDetector";
|
||||
import UrlToast from "./UrlToast";
|
||||
@@ -25,6 +26,7 @@ export default function TerminalView({ sessionId, active }: Props) {
|
||||
const detectorRef = useRef<UrlDetector | null>(null);
|
||||
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
|
||||
const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection);
|
||||
const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled);
|
||||
|
||||
const ssoBufferRef = useRef("");
|
||||
const ssoTriggeredRef = useRef(false);
|
||||
@@ -424,6 +426,8 @@ export default function TerminalView({ sessionId, active }: Props) {
|
||||
>
|
||||
{isAutoFollow ? "▼ Following" : "▽ Paused"}
|
||||
</button>
|
||||
{/* STT mic button - bottom left */}
|
||||
{sttEnabled && <SttButton sessionId={sessionId} sendInput={sendInput} />}
|
||||
{/* Jump to Current - bottom right, when scrolled up */}
|
||||
{!isAtBottom && (
|
||||
<button
|
||||
|
||||
145
app/src/hooks/useSTT.ts
Normal file
145
app/src/hooks/useSTT.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
import { useCallback, useRef, useState } from "react";
|
||||
import * as commands from "../lib/tauri-commands";
|
||||
import { encodeWav } from "../lib/wav";
|
||||
import { useAppState } from "../store/appState";
|
||||
|
||||
export type SttState = "idle" | "recording" | "transcribing" | "error";
|
||||
|
||||
export function useSTT(sessionId: string, sendInput: (sessionId: string, data: string) => Promise<void>) {
|
||||
const [state, setState] = useState<SttState>("idle");
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const audioContextRef = useRef<AudioContext | null>(null);
|
||||
const streamRef = useRef<MediaStream | null>(null);
|
||||
const workletRef = useRef<AudioWorkletNode | null>(null);
|
||||
const chunksRef = useRef<Int16Array[]>([]);
|
||||
|
||||
const appSettings = useAppState((s) => s.appSettings);
|
||||
const deviceId = appSettings?.default_microphone;
|
||||
|
||||
const startRecording = useCallback(async () => {
|
||||
if (state === "recording" || state === "transcribing") return;
|
||||
setState("recording");
|
||||
setError(null);
|
||||
chunksRef.current = [];
|
||||
|
||||
try {
|
||||
const audioConstraints: MediaTrackConstraints = {
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
};
|
||||
if (deviceId) {
|
||||
audioConstraints.deviceId = { exact: deviceId };
|
||||
}
|
||||
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints });
|
||||
streamRef.current = stream;
|
||||
|
||||
const audioContext = new AudioContext({ sampleRate: 16000 });
|
||||
audioContextRef.current = audioContext;
|
||||
|
||||
await audioContext.audioWorklet.addModule("/audio-capture-processor.js");
|
||||
|
||||
const source = audioContext.createMediaStreamSource(stream);
|
||||
const processor = new AudioWorkletNode(audioContext, "audio-capture-processor");
|
||||
workletRef.current = processor;
|
||||
|
||||
processor.port.onmessage = (event: MessageEvent<ArrayBuffer>) => {
|
||||
chunksRef.current.push(new Int16Array(event.data));
|
||||
};
|
||||
|
||||
source.connect(processor);
|
||||
processor.connect(audioContext.destination);
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
setError(msg);
|
||||
setState("error");
|
||||
}
|
||||
}, [state, deviceId]);
|
||||
|
||||
const stopRecording = useCallback(async () => {
|
||||
if (state !== "recording") return;
|
||||
|
||||
// Stop audio capture
|
||||
workletRef.current?.disconnect();
|
||||
workletRef.current = null;
|
||||
|
||||
if (audioContextRef.current) {
|
||||
await audioContextRef.current.close().catch(() => {});
|
||||
audioContextRef.current = null;
|
||||
}
|
||||
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
|
||||
// Concatenate PCM chunks
|
||||
const chunks = chunksRef.current;
|
||||
chunksRef.current = [];
|
||||
|
||||
if (chunks.length === 0) {
|
||||
setState("idle");
|
||||
return;
|
||||
}
|
||||
|
||||
const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
|
||||
const pcm = new Int16Array(totalLength);
|
||||
let offset = 0;
|
||||
for (const chunk of chunks) {
|
||||
pcm.set(chunk, offset);
|
||||
offset += chunk.length;
|
||||
}
|
||||
|
||||
// Encode to WAV and transcribe
|
||||
setState("transcribing");
|
||||
try {
|
||||
const wavBlob = encodeWav(pcm, 16000);
|
||||
const wavBuffer = await wavBlob.arrayBuffer();
|
||||
const audioData = Array.from(new Uint8Array(wavBuffer));
|
||||
|
||||
const text = await commands.transcribeAudio(audioData);
|
||||
if (text) {
|
||||
await sendInput(sessionId, text);
|
||||
}
|
||||
setState("idle");
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
setError(msg);
|
||||
setState("error");
|
||||
// Reset to idle after a brief delay so the UI shows the error
|
||||
setTimeout(() => setState("idle"), 3000);
|
||||
}
|
||||
}, [state, sessionId, sendInput]);
|
||||
|
||||
const cancelRecording = useCallback(async () => {
|
||||
workletRef.current?.disconnect();
|
||||
workletRef.current = null;
|
||||
|
||||
if (audioContextRef.current) {
|
||||
await audioContextRef.current.close().catch(() => {});
|
||||
audioContextRef.current = null;
|
||||
}
|
||||
|
||||
if (streamRef.current) {
|
||||
streamRef.current.getTracks().forEach((t) => t.stop());
|
||||
streamRef.current = null;
|
||||
}
|
||||
|
||||
chunksRef.current = [];
|
||||
setState("idle");
|
||||
setError(null);
|
||||
}, []);
|
||||
|
||||
const toggle = useCallback(async () => {
|
||||
if (state === "recording") {
|
||||
await stopRecording();
|
||||
} else if (state === "idle" || state === "error") {
|
||||
await startRecording();
|
||||
}
|
||||
}, [state, startRecording, stopRecording]);
|
||||
|
||||
return { state, error, startRecording, stopRecording, cancelRecording, toggle };
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import { invoke } from "@tauri-apps/api/core";
|
||||
import type { Project, ProjectPath, ContainerInfo, SiblingContainer, AppSettings, UpdateInfo, ImageUpdateInfo, McpServer, FileEntry, WebTerminalInfo } from "./types";
|
||||
import type { Project, ProjectPath, ContainerInfo, SiblingContainer, AppSettings, UpdateInfo, ImageUpdateInfo, McpServer, FileEntry, WebTerminalInfo, SttStatus } from "./types";
|
||||
|
||||
// Docker
|
||||
export const checkDocker = () => invoke<boolean>("check_docker");
|
||||
@@ -98,3 +98,12 @@ export const getWebTerminalStatus = () =>
|
||||
invoke<WebTerminalInfo>("get_web_terminal_status");
|
||||
export const regenerateWebTerminalToken = () =>
|
||||
invoke<WebTerminalInfo>("regenerate_web_terminal_token");
|
||||
|
||||
// STT
|
||||
export const getSttStatus = () => invoke<SttStatus>("get_stt_status");
|
||||
export const startStt = () => invoke<SttStatus>("start_stt");
|
||||
export const stopStt = () => invoke<void>("stop_stt");
|
||||
export const buildSttImage = () => invoke<void>("build_stt_image");
|
||||
export const pullSttImage = () => invoke<void>("pull_stt_image");
|
||||
export const transcribeAudio = (audioData: number[]) =>
|
||||
invoke<string>("transcribe_audio", { audioData });
|
||||
|
||||
@@ -119,6 +119,22 @@ export interface AppSettings {
|
||||
default_microphone: string | null;
|
||||
dismissed_image_digest: string | null;
|
||||
web_terminal: WebTerminalSettings;
|
||||
stt: SttSettings;
|
||||
}
|
||||
|
||||
export interface SttSettings {
|
||||
enabled: boolean;
|
||||
model: string;
|
||||
port: number;
|
||||
language: string | null;
|
||||
}
|
||||
|
||||
export interface SttStatus {
|
||||
container_exists: boolean;
|
||||
running: boolean;
|
||||
port: number;
|
||||
model: string;
|
||||
image_exists: boolean;
|
||||
}
|
||||
|
||||
export interface WebTerminalSettings {
|
||||
|
||||
40
app/src/lib/wav.ts
Normal file
40
app/src/lib/wav.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Encode PCM Int16 samples into a WAV file blob.
|
||||
* Assumes mono channel at the given sample rate.
|
||||
*/
|
||||
export function encodeWav(samples: Int16Array, sampleRate: number): Blob {
|
||||
const byteLength = samples.length * 2;
|
||||
const buffer = new ArrayBuffer(44 + byteLength);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// RIFF header
|
||||
writeString(view, 0, "RIFF");
|
||||
view.setUint32(4, 36 + byteLength, true);
|
||||
writeString(view, 8, "WAVE");
|
||||
|
||||
// fmt chunk
|
||||
writeString(view, 12, "fmt ");
|
||||
view.setUint32(16, 16, true); // chunk size
|
||||
view.setUint16(20, 1, true); // PCM format
|
||||
view.setUint16(22, 1, true); // mono
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * 2, true); // byte rate
|
||||
view.setUint16(32, 2, true); // block align
|
||||
view.setUint16(34, 16, true); // bits per sample
|
||||
|
||||
// data chunk
|
||||
writeString(view, 36, "data");
|
||||
view.setUint32(40, byteLength, true);
|
||||
|
||||
// PCM samples
|
||||
const output = new Int16Array(buffer, 44);
|
||||
output.set(samples);
|
||||
|
||||
return new Blob([buffer], { type: "audio/wav" });
|
||||
}
|
||||
|
||||
function writeString(view: DataView, offset: number, str: string) {
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
view.setUint8(offset + i, str.charCodeAt(i));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user