Add Ctrl+Shift+M hotkey for speech-to-text toggle

Lifts useSTT hook from SttButton into TerminalView so both the hotkey
and the button share the same recording state. The hotkey keeps terminal
focus so after transcription the user just presses Enter. The button
also no longer steals focus via onMouseDown preventDefault.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 05:55:52 -07:00
parent caf3e26816
commit 49d09e4447
2 changed files with 23 additions and 12 deletions

View File

@@ -1,14 +1,15 @@
import { useCallback, useEffect, useRef, useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { useSTT } from "../../hooks/useSTT"; import type { SttState } from "../../hooks/useSTT";
import * as commands from "../../lib/tauri-commands"; import * as commands from "../../lib/tauri-commands";
interface Props { interface Props {
sessionId: string; state: SttState;
sendInput: (sessionId: string, data: string) => Promise<void>; error: string | null;
onToggle: () => Promise<void>;
onCancel: () => Promise<void>;
} }
export default function SttButton({ sessionId, sendInput }: Props) { export default function SttButton({ state, error, onToggle, onCancel }: Props) {
const { state, error, toggle, cancelRecording } = useSTT(sessionId, sendInput);
const [elapsed, setElapsed] = useState(0); const [elapsed, setElapsed] = useState(0);
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null); const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
@@ -40,17 +41,17 @@ export default function SttButton({ sessionId, sendInput }: Props) {
// Container start failed, toggle will still attempt transcription // Container start failed, toggle will still attempt transcription
} }
} }
await toggle(); await onToggle();
}, [state, toggle]); }, [state, onToggle]);
const handleContextMenu = useCallback( const handleContextMenu = useCallback(
(e: React.MouseEvent) => { (e: React.MouseEvent) => {
e.preventDefault(); e.preventDefault();
if (state === "recording") { if (state === "recording") {
cancelRecording(); onCancel();
} }
}, },
[state, cancelRecording], [state, onCancel],
); );
const formatTime = (seconds: number) => { const formatTime = (seconds: number) => {
@@ -64,6 +65,7 @@ export default function SttButton({ sessionId, sendInput }: Props) {
<button <button
onClick={handleClick} onClick={handleClick}
onContextMenu={handleContextMenu} onContextMenu={handleContextMenu}
onMouseDown={(e) => e.preventDefault()} // prevent stealing focus from terminal
disabled={state === "transcribing"} disabled={state === "transcribing"}
className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${ className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${
state === "recording" state === "recording"
@@ -74,10 +76,10 @@ export default function SttButton({ sessionId, sendInput }: Props) {
}`} }`}
title={ title={
state === "recording" state === "recording"
? "Click to stop and transcribe (right-click to cancel)" ? "Click or Ctrl+Shift+M to stop and transcribe"
: state === "transcribing" : state === "transcribing"
? "Transcribing..." ? "Transcribing..."
: "Speech to text" : "Speech to text (Ctrl+Shift+M)"
} }
> >
{state === "transcribing" ? ( {state === "transcribing" ? (

View File

@@ -7,6 +7,7 @@ import { openUrl } from "@tauri-apps/plugin-opener";
import "@xterm/xterm/css/xterm.css"; import "@xterm/xterm/css/xterm.css";
import { useTerminal } from "../../hooks/useTerminal"; import { useTerminal } from "../../hooks/useTerminal";
import { useAppState } from "../../store/appState"; import { useAppState } from "../../store/appState";
import { useSTT } from "../../hooks/useSTT";
import SttButton from "./SttButton"; import SttButton from "./SttButton";
import { awsSsoRefresh } from "../../lib/tauri-commands"; import { awsSsoRefresh } from "../../lib/tauri-commands";
import { UrlDetector } from "../../lib/urlDetector"; import { UrlDetector } from "../../lib/urlDetector";
@@ -27,6 +28,9 @@ export default function TerminalView({ sessionId, active }: Props) {
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal(); const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection); const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection);
const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled); const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled);
const stt = useSTT(sessionId, sendInput);
const sttToggleRef = useRef(stt.toggle);
sttToggleRef.current = stt.toggle;
const ssoBufferRef = useRef(""); const ssoBufferRef = useRef("");
const ssoTriggeredRef = useRef(false); const ssoTriggeredRef = useRef(false);
@@ -102,6 +106,11 @@ export default function TerminalView({ sessionId, active }: Props) {
} }
return false; // prevent xterm from processing this key return false; // prevent xterm from processing this key
} }
// Ctrl+Shift+M toggles speech-to-text recording
if (event.type === "keydown" && event.ctrlKey && event.shiftKey && event.key === "M") {
sttToggleRef.current();
return false;
}
return true; return true;
}); });
@@ -427,7 +436,7 @@ export default function TerminalView({ sessionId, active }: Props) {
{isAutoFollow ? "▼ Following" : "▽ Paused"} {isAutoFollow ? "▼ Following" : "▽ Paused"}
</button> </button>
{/* STT mic button - bottom left */} {/* STT mic button - bottom left */}
{sttEnabled && <SttButton sessionId={sessionId} sendInput={sendInput} />} {sttEnabled && <SttButton state={stt.state} error={stt.error} onToggle={stt.toggle} onCancel={stt.cancelRecording} />}
{/* Jump to Current - bottom right, when scrolled up */} {/* Jump to Current - bottom right, when scrolled up */}
{!isAtBottom && ( {!isAtBottom && (
<button <button