STT improvements: hotkey, button position, and hover tooltip #2

Merged
jknapp merged 4 commits from feature/stt into main 2026-04-13 13:02:53 +00:00
2 changed files with 51 additions and 35 deletions

View File

@@ -1,15 +1,17 @@
import { useCallback, useEffect, useRef, useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { useSTT } from "../../hooks/useSTT"; import type { SttState } from "../../hooks/useSTT";
import * as commands from "../../lib/tauri-commands"; import * as commands from "../../lib/tauri-commands";
interface Props { interface Props {
sessionId: string; state: SttState;
sendInput: (sessionId: string, data: string) => Promise<void>; error: string | null;
onToggle: () => Promise<void>;
onCancel: () => Promise<void>;
} }
export default function SttButton({ sessionId, sendInput }: Props) { export default function SttButton({ state, error, onToggle, onCancel }: Props) {
const { state, error, toggle, cancelRecording } = useSTT(sessionId, sendInput);
const [elapsed, setElapsed] = useState(0); const [elapsed, setElapsed] = useState(0);
const [hovered, setHovered] = useState(false);
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null); const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
// Track recording duration // Track recording duration
@@ -40,17 +42,17 @@ export default function SttButton({ sessionId, sendInput }: Props) {
// Container start failed, toggle will still attempt transcription // Container start failed, toggle will still attempt transcription
} }
} }
await toggle(); await onToggle();
}, [state, toggle]); }, [state, onToggle]);
const handleContextMenu = useCallback( const handleContextMenu = useCallback(
(e: React.MouseEvent) => { (e: React.MouseEvent) => {
e.preventDefault(); e.preventDefault();
if (state === "recording") { if (state === "recording") {
cancelRecording(); onCancel();
} }
}, },
[state, cancelRecording], [state, onCancel],
); );
const formatTime = (seconds: number) => { const formatTime = (seconds: number) => {
@@ -60,10 +62,14 @@ export default function SttButton({ sessionId, sendInput }: Props) {
}; };
return ( return (
<div className="absolute bottom-4 left-4 z-50 flex items-center gap-2"> <div className="absolute bottom-1 left-1 z-50 flex items-center gap-2">
<div className="relative">
<button <button
onClick={handleClick} onClick={handleClick}
onContextMenu={handleContextMenu} onContextMenu={handleContextMenu}
onMouseDown={(e) => e.preventDefault()} // prevent stealing focus from terminal
onMouseEnter={() => setHovered(true)}
onMouseLeave={() => setHovered(false)}
disabled={state === "transcribing"} disabled={state === "transcribing"}
className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${ className={`w-8 h-8 rounded-full flex items-center justify-center transition-all cursor-pointer ${
state === "recording" state === "recording"
@@ -72,13 +78,6 @@ export default function SttButton({ sessionId, sendInput }: Props) {
? "bg-[#1f2937] text-[#58a6ff] border border-[#30363d] opacity-80" ? "bg-[#1f2937] text-[#58a6ff] border border-[#30363d] opacity-80"
: "bg-[#1f2937]/80 text-[#8b949e] border border-[#30363d] hover:text-[#e6edf3] hover:bg-[#2d3748]" : "bg-[#1f2937]/80 text-[#8b949e] border border-[#30363d] hover:text-[#e6edf3] hover:bg-[#2d3748]"
}`} }`}
title={
state === "recording"
? "Click to stop and transcribe (right-click to cancel)"
: state === "transcribing"
? "Transcribing..."
: "Speech to text"
}
> >
{state === "transcribing" ? ( {state === "transcribing" ? (
<svg className="w-4 h-4 animate-spin" viewBox="0 0 24 24" fill="none"> <svg className="w-4 h-4 animate-spin" viewBox="0 0 24 24" fill="none">
@@ -92,6 +91,14 @@ export default function SttButton({ sessionId, sendInput }: Props) {
</svg> </svg>
)} )}
</button> </button>
{hovered && state !== "recording" && (
<div className="absolute bottom-full left-0 mb-1.5 px-2 py-1 text-[11px] leading-snug text-[#e6edf3] bg-[#21262d] border border-[#30363d] rounded shadow-lg whitespace-nowrap pointer-events-none">
{state === "transcribing" ? "Transcribing..." : (
<>Speech to text <kbd className="ml-1 px-1 py-0.5 text-[10px] bg-[#0d1117] border border-[#30363d] rounded font-mono">Ctrl+Shift+M</kbd></>
)}
</div>
)}
</div>
{state === "recording" && ( {state === "recording" && (
<span className="text-xs text-[#f85149] font-mono bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d]"> <span className="text-xs text-[#f85149] font-mono bg-[#1f2937] px-2 py-0.5 rounded border border-[#30363d]">
{formatTime(elapsed)} {formatTime(elapsed)}

View File

@@ -7,6 +7,7 @@ import { openUrl } from "@tauri-apps/plugin-opener";
import "@xterm/xterm/css/xterm.css"; import "@xterm/xterm/css/xterm.css";
import { useTerminal } from "../../hooks/useTerminal"; import { useTerminal } from "../../hooks/useTerminal";
import { useAppState } from "../../store/appState"; import { useAppState } from "../../store/appState";
import { useSTT } from "../../hooks/useSTT";
import SttButton from "./SttButton"; import SttButton from "./SttButton";
import { awsSsoRefresh } from "../../lib/tauri-commands"; import { awsSsoRefresh } from "../../lib/tauri-commands";
import { UrlDetector } from "../../lib/urlDetector"; import { UrlDetector } from "../../lib/urlDetector";
@@ -27,6 +28,9 @@ export default function TerminalView({ sessionId, active }: Props) {
const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal(); const { sendInput, pasteImage, resize, onOutput, onExit } = useTerminal();
const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection); const setTerminalHasSelection = useAppState(s => s.setTerminalHasSelection);
const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled); const sttEnabled = useAppState(s => s.appSettings?.stt?.enabled);
const stt = useSTT(sessionId, sendInput);
const sttToggleRef = useRef(stt.toggle);
sttToggleRef.current = stt.toggle;
const ssoBufferRef = useRef(""); const ssoBufferRef = useRef("");
const ssoTriggeredRef = useRef(false); const ssoTriggeredRef = useRef(false);
@@ -102,6 +106,11 @@ export default function TerminalView({ sessionId, active }: Props) {
} }
return false; // prevent xterm from processing this key return false; // prevent xterm from processing this key
} }
// Ctrl+Shift+M toggles speech-to-text recording
if (event.type === "keydown" && event.ctrlKey && event.shiftKey && event.key === "M") {
sttToggleRef.current();
return false;
}
return true; return true;
}); });
@@ -427,7 +436,7 @@ export default function TerminalView({ sessionId, active }: Props) {
{isAutoFollow ? "▼ Following" : "▽ Paused"} {isAutoFollow ? "▼ Following" : "▽ Paused"}
</button> </button>
{/* STT mic button - bottom left */} {/* STT mic button - bottom left */}
{sttEnabled && <SttButton sessionId={sessionId} sendInput={sendInput} />} {sttEnabled && <SttButton state={stt.state} error={stt.error} onToggle={stt.toggle} onCancel={stt.cancelRecording} />}
{/* Jump to Current - bottom right, when scrolled up */} {/* Jump to Current - bottom right, when scrolled up */}
{!isAtBottom && ( {!isAtBottom && (
<button <button