Initial commit: Alfred Mobile - AI Assistant Android App
- OAuth authentication via Authentik - WebSocket connection to OpenClaw gateway - Configurable gateway URL with first-run setup - User preferences sync across devices - Multi-user support with custom assistant names - ElevenLabs TTS integration (local + remote) - FCM push notifications for alarms - Voice input via Google Speech API - No hardcoded secrets or internal IPs in tracked files
This commit is contained in:
207
app/src/main/java/com/openclaw/alfred/voice/VoiceInputManager.kt
Normal file
207
app/src/main/java/com/openclaw/alfred/voice/VoiceInputManager.kt
Normal file
@@ -0,0 +1,207 @@
|
||||
package com.openclaw.alfred.voice
|
||||
|
||||
import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.os.Bundle
|
||||
import android.speech.RecognitionListener
|
||||
import android.speech.RecognizerIntent
|
||||
import android.speech.SpeechRecognizer
|
||||
import android.util.Log
|
||||
import java.util.*
|
||||
|
||||
/**
|
||||
* Manages on-device voice-to-text using Android SpeechRecognizer.
|
||||
*/
|
||||
class VoiceInputManager(
|
||||
private val context: Context,
|
||||
private val onResult: (String) -> Unit,
|
||||
private val onError: (String) -> Unit,
|
||||
private val onListening: (Boolean) -> Unit
|
||||
) {
|
||||
|
||||
private val TAG = "VoiceInputManager"
|
||||
private var speechRecognizer: SpeechRecognizer? = null
|
||||
private var isListening = false
|
||||
private val handler = android.os.Handler(android.os.Looper.getMainLooper())
|
||||
|
||||
/**
|
||||
* Create RecognitionListener for SpeechRecognizer.
|
||||
*/
|
||||
private fun createRecognitionListener() = object : RecognitionListener {
|
||||
override fun onReadyForSpeech(params: Bundle?) {
|
||||
Log.d(TAG, "Ready for speech")
|
||||
isListening = true
|
||||
onListening(true)
|
||||
}
|
||||
|
||||
override fun onBeginningOfSpeech() {
|
||||
Log.d(TAG, "Speech started")
|
||||
}
|
||||
|
||||
override fun onRmsChanged(rmsdB: Float) {
|
||||
// Audio level changed - could show visual feedback
|
||||
}
|
||||
|
||||
override fun onBufferReceived(buffer: ByteArray?) {
|
||||
// Partial audio buffer
|
||||
}
|
||||
|
||||
override fun onEndOfSpeech() {
|
||||
Log.d(TAG, "Speech ended")
|
||||
isListening = false
|
||||
onListening(false)
|
||||
}
|
||||
|
||||
override fun onError(error: Int) {
|
||||
Log.e(TAG, "Recognition error: $error")
|
||||
isListening = false
|
||||
onListening(false)
|
||||
|
||||
val errorMsg = when (error) {
|
||||
SpeechRecognizer.ERROR_AUDIO -> "Audio recording error (microphone busy or unavailable)"
|
||||
SpeechRecognizer.ERROR_CLIENT -> "Client error (recognizer not ready - try again)"
|
||||
SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Missing permissions"
|
||||
SpeechRecognizer.ERROR_NETWORK -> "Network error"
|
||||
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
|
||||
SpeechRecognizer.ERROR_NO_MATCH -> "No speech detected - try again"
|
||||
SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Microphone busy - please wait and try again"
|
||||
SpeechRecognizer.ERROR_SERVER -> "Server error"
|
||||
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
|
||||
11 -> "Recognizer initialization error (try again in a moment)"
|
||||
else -> "Unknown error: $error"
|
||||
}
|
||||
onError(errorMsg)
|
||||
}
|
||||
|
||||
override fun onResults(results: Bundle?) {
|
||||
Log.d(TAG, "Got results")
|
||||
val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
|
||||
if (!matches.isNullOrEmpty()) {
|
||||
val text = matches[0]
|
||||
Log.d(TAG, "Recognized: $text")
|
||||
onResult(text)
|
||||
}
|
||||
isListening = false
|
||||
onListening(false)
|
||||
}
|
||||
|
||||
override fun onPartialResults(partialResults: Bundle?) {
|
||||
// Partial recognition results (if enabled)
|
||||
}
|
||||
|
||||
override fun onEvent(eventType: Int, params: Bundle?) {
|
||||
// Recognition event
|
||||
}
|
||||
}
|
||||
|
||||
init {
|
||||
if (SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
|
||||
speechRecognizer?.setRecognitionListener(createRecognitionListener())
|
||||
} else {
|
||||
Log.e(TAG, "Speech recognition not available on this device")
|
||||
onError("Speech recognition not available")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start listening for voice input.
|
||||
*/
|
||||
fun startListening() {
|
||||
if (isListening) {
|
||||
Log.w(TAG, "Already listening")
|
||||
return
|
||||
}
|
||||
|
||||
// Destroy previous SpeechRecognizer instance
|
||||
try {
|
||||
speechRecognizer?.destroy()
|
||||
speechRecognizer = null
|
||||
} catch (e: Exception) {
|
||||
Log.w(TAG, "Error destroying previous recognizer", e)
|
||||
}
|
||||
|
||||
// Add delay to ensure Android speech service has fully released resources
|
||||
// This prevents error 11 (initialization error) caused by race condition
|
||||
handler.postDelayed({
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
Log.e(TAG, "Speech recognition not available on this device")
|
||||
onError("Speech recognition not available")
|
||||
return@postDelayed
|
||||
}
|
||||
|
||||
// Create new SpeechRecognizer instance
|
||||
try {
|
||||
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
|
||||
speechRecognizer?.setRecognitionListener(createRecognitionListener())
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to create speech recognizer", e)
|
||||
onError("Failed to initialize: ${e.message}")
|
||||
return@postDelayed
|
||||
}
|
||||
|
||||
// Create intent with extended timeouts
|
||||
val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
||||
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
||||
putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
|
||||
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
|
||||
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, false)
|
||||
|
||||
// Extend silence detection timeouts for longer pauses
|
||||
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6500L)
|
||||
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 5000L)
|
||||
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 12000L)
|
||||
}
|
||||
|
||||
// Start listening
|
||||
try {
|
||||
speechRecognizer?.startListening(intent)
|
||||
Log.d(TAG, "Started listening")
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to start listening", e)
|
||||
isListening = false
|
||||
onListening(false)
|
||||
onError("Failed to start: ${e.message}")
|
||||
}
|
||||
}, 150) // 150ms delay to avoid race condition
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop listening.
|
||||
*/
|
||||
fun stopListening() {
|
||||
if (isListening) {
|
||||
speechRecognizer?.stopListening()
|
||||
isListening = false
|
||||
onListening(false)
|
||||
Log.d(TAG, "Stopped listening")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel listening.
|
||||
*/
|
||||
fun cancel() {
|
||||
if (isListening) {
|
||||
speechRecognizer?.cancel()
|
||||
isListening = false
|
||||
onListening(false)
|
||||
Log.d(TAG, "Cancelled listening")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup resources.
|
||||
*/
|
||||
fun destroy() {
|
||||
speechRecognizer?.destroy()
|
||||
speechRecognizer = null
|
||||
handler.removeCallbacksAndMessages(null)
|
||||
Log.d(TAG, "Destroyed")
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently listening.
|
||||
*/
|
||||
fun isListening(): Boolean = isListening
|
||||
}
|
||||
Reference in New Issue
Block a user