alfred-mobile/app/src/main/java/com/openclaw/alfred/voice/VoiceInputManager.kt

package com.openclaw.alfred.voice

import android.content.Context
import android.content.Intent
import android.os.Bundle
import android.speech.RecognitionListener
import android.speech.RecognizerIntent
import android.speech.SpeechRecognizer
import android.util.Log
import java.util.*

/**
 * Manages on-device voice-to-text using Android SpeechRecognizer.
 */
class VoiceInputManager(
    private val context: Context,
    private val onResult: (String) -> Unit,
    private val onError: (String) -> Unit,
    private val onListening: (Boolean) -> Unit
) {

    private val TAG = "VoiceInputManager"
    private var speechRecognizer: SpeechRecognizer? = null
    private var isListening = false
    private val handler = android.os.Handler(android.os.Looper.getMainLooper())

    /**
     * Create RecognitionListener for SpeechRecognizer.
     */
    private fun createRecognitionListener() = object : RecognitionListener {
                override fun onReadyForSpeech(params: Bundle?) {
                    Log.d(TAG, "Ready for speech")
                    isListening = true
                    onListening(true)
                }

                override fun onBeginningOfSpeech() {
                    Log.d(TAG, "Speech started")
                }

                override fun onRmsChanged(rmsdB: Float) {
                    // Audio level changed - could show visual feedback
                }

                override fun onBufferReceived(buffer: ByteArray?) {
                    // Partial audio buffer
                }

                override fun onEndOfSpeech() {
                    Log.d(TAG, "Speech ended")
                    isListening = false
                    onListening(false)
                }

                override fun onError(error: Int) {
                    Log.e(TAG, "Recognition error: $error")
                    isListening = false
                    onListening(false)

                    val errorMsg = when (error) {
                        SpeechRecognizer.ERROR_AUDIO -> "Audio recording error (microphone busy or unavailable)"
                        SpeechRecognizer.ERROR_CLIENT -> "Client error (recognizer not ready - try again)"
                        SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Missing permissions"
                        SpeechRecognizer.ERROR_NETWORK -> "Network error"
                        SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
                        SpeechRecognizer.ERROR_NO_MATCH -> "No speech detected - try again"
                        SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Microphone busy - please wait and try again"
                        SpeechRecognizer.ERROR_SERVER -> "Server error"
                        SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Speech timeout"
                        11 -> "Recognizer initialization error (try again in a moment)"
                        else -> "Unknown error: $error"
                    }
                    onError(errorMsg)
                }

                override fun onResults(results: Bundle?) {
                    Log.d(TAG, "Got results")
                    val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
                    if (!matches.isNullOrEmpty()) {
                        val text = matches[0]
                        Log.d(TAG, "Recognized: $text")
                        onResult(text)
                    }
                    isListening = false
                    onListening(false)
                }

                override fun onPartialResults(partialResults: Bundle?) {
                    // Partial recognition results (if enabled)
                }

                override fun onEvent(eventType: Int, params: Bundle?) {
                    // Recognition event
                }
            }

    init {
        if (SpeechRecognizer.isRecognitionAvailable(context)) {
            speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
            speechRecognizer?.setRecognitionListener(createRecognitionListener())
        } else {
            Log.e(TAG, "Speech recognition not available on this device")
            onError("Speech recognition not available")
        }
    }

    /**
     * Start listening for voice input.
     */
    fun startListening() {
        if (isListening) {
            Log.w(TAG, "Already listening")
            return
        }

        // Destroy previous SpeechRecognizer instance
        try {
            speechRecognizer?.destroy()
            speechRecognizer = null
        } catch (e: Exception) {
            Log.w(TAG, "Error destroying previous recognizer", e)
        }

        // Add delay to ensure Android speech service has fully released resources
        // This prevents error 11 (initialization error) caused by race condition
        handler.postDelayed({
            if (!SpeechRecognizer.isRecognitionAvailable(context)) {
                Log.e(TAG, "Speech recognition not available on this device")
                onError("Speech recognition not available")
                return@postDelayed
            }

            // Create new SpeechRecognizer instance
            try {
                speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
                speechRecognizer?.setRecognitionListener(createRecognitionListener())
            } catch (e: Exception) {
                Log.e(TAG, "Failed to create speech recognizer", e)
                onError("Failed to initialize: ${e.message}")
                return@postDelayed
            }

            // Create intent with extended timeouts
            val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
                putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
                putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
                putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
                putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, false)

                // Extend silence detection timeouts for longer pauses
                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, 6500L)
                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, 5000L)
                putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, 12000L)
            }

            // Start listening
            try {
                speechRecognizer?.startListening(intent)
                Log.d(TAG, "Started listening")
            } catch (e: Exception) {
                Log.e(TAG, "Failed to start listening", e)
                isListening = false
                onListening(false)
                onError("Failed to start: ${e.message}")
            }
        }, 150) // 150ms delay to avoid race condition
    }

    /**
     * Stop listening.
     */
    fun stopListening() {
        if (isListening) {
            speechRecognizer?.stopListening()
            isListening = false
            onListening(false)
            Log.d(TAG, "Stopped listening")
        }
    }

    /**
     * Cancel listening.
     */
    fun cancel() {
        if (isListening) {
            speechRecognizer?.cancel()
            isListening = false
            onListening(false)
            Log.d(TAG, "Cancelled listening")
        }
    }

    /**
     * Cleanup resources.
     */
    fun destroy() {
        speechRecognizer?.destroy()
        speechRecognizer = null
        handler.removeCallbacksAndMessages(null)
        Log.d(TAG, "Destroyed")
    }

    /**
     * Check if currently listening.
     */
    fun isListening(): Boolean = isListening
}