Initial commit: Alfred Mobile - AI Assistant Android App

- OAuth authentication via Authentik - WebSocket connection to OpenClaw gateway - Configurable gateway URL with first-run setup - User preferences sync across devices - Multi-user support with custom assistant names - ElevenLabs TTS integration (local + remote) - FCM push notifications for alarms - Voice input via Google Speech API - No hardcoded secrets or internal IPs in tracked files
2026-02-09 11:12:51 -08:00
commit 6d4ae2e5c3
92 changed files with 15173 additions and 0 deletions
--- a/app/src/main/java/com/openclaw/alfred/voice/VoiceInputManager.kt
+++ b/app/src/main/java/com/openclaw/alfred/voice/VoiceInputManager.kt
@@ -0,0 +1,207 @@
+package com.openclaw.alfred.voice
+
+import android.content.Context
+import android.content.Intent
+import android.os.Bundle
+import android.speech.RecognitionListener
+import android.speech.RecognizerIntent
+import android.speech.SpeechRecognizer
+import android.util.Log
+import java.util.*
+
+/**
+ * Manages on-device voice-to-text using Android SpeechRecognizer.
+ */
+class VoiceInputManager(
+    private val context: Context,
+    private val onResult: (String) -> Unit,
+    private val onError: (String) -> Unit,
+    private val onListening: (Boolean) -> Unit
+) {
+    
+    private val TAG = "VoiceInputManager"
+    private var speechRecognizer: SpeechRecognizer? = null
+    private var isListening = false
+    private val handler = android.os.Handler(android.os.Looper.getMainLooper())
+    
+    /**
+     * Create RecognitionListener for SpeechRecognizer.
+     */
+    private fun createRecognitionListener() = object : RecognitionListener {
+                override fun onReadyForSpeech(params: Bundle?) {
+                    Log.d(TAG, "Ready for speech")
+                    isListening = true
+                    onListening(true)
+                }
+                
+                override fun onBeginningOfSpeech() {
+                    Log.d(TAG, "Speech started")
+                }
+                
+                override fun onRmsChanged(rmsdB: Float) {
+                    // Audio level changed - could show visual feedback
+                }
+                
+                override fun onBufferReceived(buffer: ByteArray?) {
+                    // Partial audio buffer
+                }
+                
+                override fun onEndOfSpeech() {
+                    Log.d(TAG, "Speech ended")
+                    isListening = false
+                    onListening(false)
+                }
+                
+                override fun onError(error: Int) {
+                    Log.e(TAG, "Recognition error: $error")
+                    isListening = false
+                    onListening(false)
+                    
+                    val errorMsg = when (error) {
+                        SpeechRecognizer.ERROR_AUDIO -> "Audio recording error (microphone busy or unavailable)"
+                        SpeechRecognizer.ERROR_CLIENT -> "Client error (recognizer not ready - try again)"
+                        SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Missing permissions"
+                        SpeechRecognizer.ERROR_NETWORK -> "Network error"
+                        SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
+                        SpeechRecognizer.ERROR_NO_MATCH -> "No speech detected - try again"
+                        SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Microphone busy - please wait and try again"
+                        SpeechRecognizer.ERROR_SERVER -> "Server error"
+                        SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
+                        11 -> "Recognizer initialization error (try again in a moment)"
+                        else -> "Unknown error: $error"
+                    }
+                    onError(errorMsg)
+                }
+                
+                override fun onResults(results: Bundle?) {
+                    Log.d(TAG, "Got results")
+                    val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
+                    if (!matches.isNullOrEmpty()) {
+                        val text = matches[0]
+                        Log.d(TAG, "Recognized: $text")
+                        onResult(text)
+                    }
+                    isListening = false
+                    onListening(false)
+                }
+                
+                override fun onPartialResults(partialResults: Bundle?) {
+                    // Partial recognition results (if enabled)
+                }
+                
+                override fun onEvent(eventType: Int, params: Bundle?) {
+                    // Recognition event
+                }
+            }
+    
+    init {
+        if (SpeechRecognizer.isRecognitionAvailable(context)) {
+            speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
+            speechRecognizer?.setRecognitionListener(createRecognitionListener())
+        } else {
+            Log.e(TAG, "Speech recognition not available on this device")
+            onError("Speech recognition not available")
+        }
+    }
+    
+    /**
+     * Start listening for voice input.
+     */
+    fun startListening() {
+        if (isListening) {
+            Log.w(TAG, "Already listening")
+            return
+        }
+        
+        // Destroy previous SpeechRecognizer instance
+        try {
+            speechRecognizer?.destroy()
+            speechRecognizer = null
+        } catch (e: Exception) {
+            Log.w(TAG, "Error destroying previous recognizer", e)
+        }
+        
+        // Add delay to ensure Android speech service has fully released resources
+        // This prevents error 11 (initialization error) caused by race condition
+        handler.postDelayed({
+            if (!SpeechRecognizer.isRecognitionAvailable(context)) {
+                Log.e(TAG, "Speech recognition not available on this device")
+                onError("Speech recognition not available")
+                return@postDelayed
+            }
+            
+            // Create new SpeechRecognizer instance
+            try {
+                speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
+                speechRecognizer?.setRecognitionListener(createRecognitionListener())
+            } catch (e: Exception) {
+                Log.e(TAG, "Failed to create speech recognizer", e)
+                onError("Failed to initialize: ${e.message}")
+                return@postDelayed
+            }
+            
+            // Create intent with extended timeouts
+            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
+                putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
+                putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
+                putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
+                putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, false)
+                
+                // Extend silence detection timeouts for longer pauses
+                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6500L)
+                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 5000L)
+                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 12000L)
+            }
+            
+            // Start listening
+            try {
+                speechRecognizer?.startListening(intent)
+                Log.d(TAG, "Started listening")
+            } catch (e: Exception) {
+                Log.e(TAG, "Failed to start listening", e)
+                isListening = false
+                onListening(false)
+                onError("Failed to start: ${e.message}")
+            }
+        }, 150) // 150ms delay to avoid race condition
+    }
+    
+    /**
+     * Stop listening.
+     */
+    fun stopListening() {
+        if (isListening) {
+            speechRecognizer?.stopListening()
+            isListening = false
+            onListening(false)
+            Log.d(TAG, "Stopped listening")
+        }
+    }
+    
+    /**
+     * Cancel listening.
+     */
+    fun cancel() {
+        if (isListening) {
+            speechRecognizer?.cancel()
+            isListening = false
+            onListening(false)
+            Log.d(TAG, "Cancelled listening")
+        }
+    }
+    
+    /**
+     * Cleanup resources.
+     */
+    fun destroy() {
+        speechRecognizer?.destroy()
+        speechRecognizer = null
+        handler.removeCallbacksAndMessages(null)
+        Log.d(TAG, "Destroyed")
+    }
+    
+    /**
+     * Check if currently listening.
+     */
+    fun isListening(): Boolean = isListening
+}