Add speech-to-text feature using Faster Whisper container
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Some checks failed
Build App / compute-version (pull_request) Successful in 3s
Build App / build-macos (pull_request) Successful in 2m28s
Build STT Container / build-stt-container (pull_request) Successful in 3m18s
Build App / build-windows (pull_request) Successful in 4m40s
Build App / build-linux (pull_request) Failing after 1m46s
Build App / create-tag (pull_request) Has been skipped
Build App / sync-to-github (pull_request) Has been skipped
Adds a mic button to the terminal UI that captures speech, transcribes it via a Faster Whisper sidecar container, and injects the text into the terminal input. Includes settings panel for model selection (tiny/small/medium), port config, and container lifecycle management. - stt-container/: Dockerfile + FastAPI server for Whisper transcription - Rust backend: STT container management, transcribe_audio IPC command - Frontend: useSTT hook, SttButton, SttSettings, WAV encoder - CI: Gitea Actions workflow for multi-arch STT image builds Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
40
app/src/lib/wav.ts
Normal file
40
app/src/lib/wav.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
/**
|
||||
* Encode PCM Int16 samples into a WAV file blob.
|
||||
* Assumes mono channel at the given sample rate.
|
||||
*/
|
||||
export function encodeWav(samples: Int16Array, sampleRate: number): Blob {
|
||||
const byteLength = samples.length * 2;
|
||||
const buffer = new ArrayBuffer(44 + byteLength);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// RIFF header
|
||||
writeString(view, 0, "RIFF");
|
||||
view.setUint32(4, 36 + byteLength, true);
|
||||
writeString(view, 8, "WAVE");
|
||||
|
||||
// fmt chunk
|
||||
writeString(view, 12, "fmt ");
|
||||
view.setUint32(16, 16, true); // chunk size
|
||||
view.setUint16(20, 1, true); // PCM format
|
||||
view.setUint16(22, 1, true); // mono
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * 2, true); // byte rate
|
||||
view.setUint16(32, 2, true); // block align
|
||||
view.setUint16(34, 16, true); // bits per sample
|
||||
|
||||
// data chunk
|
||||
writeString(view, 36, "data");
|
||||
view.setUint32(40, byteLength, true);
|
||||
|
||||
// PCM samples
|
||||
const output = new Int16Array(buffer, 44);
|
||||
output.set(samples);
|
||||
|
||||
return new Blob([buffer], { type: "audio/wav" });
|
||||
}
|
||||
|
||||
function writeString(view: DataView, offset: number, str: string) {
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
view.setUint8(offset + i, str.charCodeAt(i));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user