package com.openclaw.alfred.voice import android.content.Context import android.content.Intent import android.os.Bundle import android.speech.RecognitionListener import android.speech.RecognizerIntent import android.speech.SpeechRecognizer import android.util.Log import java.util.* /** * Manages on-device voice-to-text using Android SpeechRecognizer. */ class VoiceInputManager( private val context: Context, private val onResult: (String) -> Unit, private val onError: (String) -> Unit, private val onListening: (Boolean) -> Unit ) { private val TAG = "VoiceInputManager" private var speechRecognizer: SpeechRecognizer? = null private var isListening = false private val handler = android.os.Handler(android.os.Looper.getMainLooper()) /** * Create RecognitionListener for SpeechRecognizer. */ private fun createRecognitionListener() = object : RecognitionListener { override fun onReadyForSpeech(params: Bundle?) { Log.d(TAG, "Ready for speech") isListening = true onListening(true) } override fun onBeginningOfSpeech() { Log.d(TAG, "Speech started") } override fun onRmsChanged(rmsdB: Float) { // Audio level changed - could show visual feedback } override fun onBufferReceived(buffer: ByteArray?) { // Partial audio buffer } override fun onEndOfSpeech() { Log.d(TAG, "Speech ended") isListening = false onListening(false) } override fun onError(error: Int) { Log.e(TAG, "Recognition error: $error") isListening = false onListening(false) val errorMsg = when (error) { SpeechRecognizer.ERROR_AUDIO -> "Audio recording error (microphone busy or unavailable)" SpeechRecognizer.ERROR_CLIENT -> "Client error (recognizer not ready - try again)" SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Missing permissions" SpeechRecognizer.ERROR_NETWORK -> "Network error" SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout" SpeechRecognizer.ERROR_NO_MATCH -> "No speech detected - try again" SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Microphone busy - please wait and try again" SpeechRecognizer.ERROR_SERVER -> "Server error" SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout" 11 -> "Recognizer initialization error (try again in a moment)" else -> "Unknown error: $error" } onError(errorMsg) } override fun onResults(results: Bundle?) { Log.d(TAG, "Got results") val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) if (!matches.isNullOrEmpty()) { val text = matches[0] Log.d(TAG, "Recognized: $text") onResult(text) } isListening = false onListening(false) } override fun onPartialResults(partialResults: Bundle?) { // Partial recognition results (if enabled) } override fun onEvent(eventType: Int, params: Bundle?) { // Recognition event } } init { if (SpeechRecognizer.isRecognitionAvailable(context)) { speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context) speechRecognizer?.setRecognitionListener(createRecognitionListener()) } else { Log.e(TAG, "Speech recognition not available on this device") onError("Speech recognition not available") } } /** * Start listening for voice input. */ fun startListening() { if (isListening) { Log.w(TAG, "Already listening") return } // Destroy previous SpeechRecognizer instance try { speechRecognizer?.destroy() speechRecognizer = null } catch (e: Exception) { Log.w(TAG, "Error destroying previous recognizer", e) } // Add delay to ensure Android speech service has fully released resources // This prevents error 11 (initialization error) caused by race condition handler.postDelayed({ if (!SpeechRecognizer.isRecognitionAvailable(context)) { Log.e(TAG, "Speech recognition not available on this device") onError("Speech recognition not available") return@postDelayed } // Create new SpeechRecognizer instance try { speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context) speechRecognizer?.setRecognitionListener(createRecognitionListener()) } catch (e: Exception) { Log.e(TAG, "Failed to create speech recognizer", e) onError("Failed to initialize: ${e.message}") return@postDelayed } // Create intent with extended timeouts val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault()) putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, false) // Extend silence detection timeouts for longer pauses putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6500L) putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 5000L) putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 12000L) } // Start listening try { speechRecognizer?.startListening(intent) Log.d(TAG, "Started listening") } catch (e: Exception) { Log.e(TAG, "Failed to start listening", e) isListening = false onListening(false) onError("Failed to start: ${e.message}") } }, 150) // 150ms delay to avoid race condition } /** * Stop listening. */ fun stopListening() { if (isListening) { speechRecognizer?.stopListening() isListening = false onListening(false) Log.d(TAG, "Stopped listening") } } /** * Cancel listening. */ fun cancel() { if (isListening) { speechRecognizer?.cancel() isListening = false onListening(false) Log.d(TAG, "Cancelled listening") } } /** * Cleanup resources. */ fun destroy() { speechRecognizer?.destroy() speechRecognizer = null handler.removeCallbacksAndMessages(null) Log.d(TAG, "Destroyed") } /** * Check if currently listening. */ fun isListening(): Boolean = isListening }