alfred-mobile/app/src/main/java/com/openclaw/alfred/voice/TTSManager.kt

package com.openclaw.alfred.voice

import android.content.Context
import android.media.MediaPlayer
import android.speech.tts.TextToSpeech
import android.util.Log
import com.openclaw.alfred.BuildConfig
import kotlinx.coroutines.*
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient
import okhttp3.Request
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONObject
import java.io.File
import java.io.FileOutputStream
import java.util.*
import java.util.concurrent.TimeUnit

/**
 * Manages Text-to-Speech using ElevenLabs API with extended timeout.
 */
class TTSManager(private val context: Context) {

    private val TAG = "TTSManager"
    private val client = OkHttpClient.Builder()
        .connectTimeout(30, TimeUnit.SECONDS)
        .readTimeout(120, TimeUnit.SECONDS)  // Extended for long responses
        .writeTimeout(30, TimeUnit.SECONDS)
        .build()
    private var mediaPlayer: MediaPlayer? = null
    private val scope = CoroutineScope(Dispatchers.IO + SupervisorJob())

    private val apiKey = BuildConfig.ELEVENLABS_API_KEY
    private val baseUrl = "https://api.elevenlabs.io/v1"

    // Read voice ID from preferences (default: Finn - vBKc2FfBKJfcZNyEt1n6)
    private fun getVoiceId(): String {
        val prefs = context.getSharedPreferences("alfred_settings", Context.MODE_PRIVATE)
        return prefs.getString("tts_voice_id", BuildConfig.ELEVENLABS_VOICE_ID)
            ?: BuildConfig.ELEVENLABS_VOICE_ID
    }

    // Fallback Android TTS
    private var androidTTS: TextToSpeech? = null
    private var ttsReady = false

    init {
        // Initialize Android TTS as fallback
        androidTTS = TextToSpeech(context) { status ->
            if (status == TextToSpeech.SUCCESS) {
                androidTTS?.language = Locale.US
                ttsReady = true
                Log.d(TAG, "Android TTS initialized successfully")
            } else {
                Log.e(TAG, "Android TTS initialization failed")
            }
        }
    }

    /**
     * Sanitize text for TTS by removing markdown and special characters.
     */
    private fun sanitizeTextForSpeech(text: String): String {
        var cleaned = text

        // Remove markdown formatting
        cleaned = cleaned.replace(Regex("\\*\\*([^*]+)\\*\\*"), "$1")  // Bold: **text**
        cleaned = cleaned.replace(Regex("\\*([^*]+)\\*"), "$1")        // Italic: *text*
        cleaned = cleaned.replace(Regex("__([^_]+)__"), "$1")          // Bold: __text__
        cleaned = cleaned.replace(Regex("_([^_]+)_"), "$1")            // Italic: _text_
        cleaned = cleaned.replace(Regex("~~([^~]+)~~"), "$1")          // Strikethrough: ~~text~~
        cleaned = cleaned.replace(Regex("`([^`]+)`"), "$1")            // Inline code: `text`

        // Remove code blocks
        cleaned = cleaned.replace(Regex("```[\\s\\S]*?```"), "")       // Code blocks

        // Remove links but keep link text
        cleaned = cleaned.replace(Regex("\\[([^]]+)]\\([^)]+\\)"), "$1")  // [text](url)
        cleaned = cleaned.replace(Regex("https?://\\S+"), "")          // Plain URLs

        // Remove list markers
        cleaned = cleaned.replace(Regex("^[\\s]*[-*+•]\\s+", RegexOption.MULTILINE), "")  // List bullets
        cleaned = cleaned.replace(Regex("^[\\s]*\\d+\\.\\s+", RegexOption.MULTILINE), "")  // Numbered lists

        // Remove headers
        cleaned = cleaned.replace(Regex("^#+\\s+", RegexOption.MULTILINE), "")  // # Headers

        // Remove blockquotes
        cleaned = cleaned.replace(Regex("^>\\s+", RegexOption.MULTILINE), "")

        // Remove emoji shortcodes
        cleaned = cleaned.replace(Regex(":[a-z_]+:"), "")

        // Remove brackets and parentheses (but keep content)
        cleaned = cleaned.replace(Regex("[\\[\\]()]"), "")

        // Remove multiple punctuation marks (e.g., "..." -> ".")
        cleaned = cleaned.replace(Regex("([.!?]){2,}"), "$1")

        // Remove special characters but keep basic punctuation
        cleaned = cleaned.replace(Regex("[^a-zA-Z0-9\\s.,!?;:'-]"), "")

        // Clean up whitespace
        cleaned = cleaned.replace(Regex("\\s+"), " ")
        cleaned = cleaned.trim()

        Log.d(TAG, "Sanitized for TTS: '$text' -> '$cleaned'")
        return cleaned
    }

    /**
     * Convert text to speech and play it.
     */
    fun speak(text: String, onComplete: () -> Unit = {}, onError: (String) -> Unit = {}) {
        if (apiKey.isEmpty()) {
            Log.w(TAG, "ElevenLabs API key not configured, using Android TTS")
            speakWithAndroidTTS(text, onComplete, onError)
            return
        }

        scope.launch {
            try {
                // Sanitize text before sending to TTS
                val cleanText = sanitizeTextForSpeech(text)

                if (cleanText.isBlank()) {
                    Log.w(TAG, "Text became empty after sanitization, skipping TTS")
                    withContext(Dispatchers.Main) { onComplete() }
                    return@launch
                }

                Log.d(TAG, "Converting text to speech: ${cleanText.take(50)}...")

                // Call TTS proxy endpoint
                val voiceId = getVoiceId()
                val audioUrl = callTTSProxy(cleanText, voiceId)

                if (audioUrl == null) {
                    // Fallback to Android TTS
                    Log.w(TAG, "TTS proxy failed, falling back to Android TTS")
                    withContext(Dispatchers.Main) {
                        speakWithAndroidTTS(cleanText, onComplete, onError)
                    }
                    return@launch
                }

                Log.d(TAG, "TTS audio URL: $audioUrl")

                // Play audio on main thread
                withContext(Dispatchers.Main) {
                    val baseUrl = BuildConfig.GATEWAY_URL.replace("wss://", "https://").replace("ws://", "http://")
                    playStreamingAudio("$baseUrl$audioUrl", onComplete, onError)
                }

            } catch (e: Exception) {
                Log.e(TAG, "TTS error, falling back to Android TTS", e)
                // Use sanitized text for fallback too
                val cleanText = sanitizeTextForSpeech(text)
                withContext(Dispatchers.Main) {
                    speakWithAndroidTTS(cleanText, onComplete, onError)
                }
            }
        }
    }

    /**
     * Call TTS proxy and get audio URL.
     */
    private fun callTTSProxy(text: String, voiceId: String): String? {
        try {
            val baseUrl = BuildConfig.GATEWAY_URL.replace("wss://", "https://").replace("ws://", "http://")
            val proxyUrl = "$baseUrl/api/tts"

            val json = JSONObject().apply {
                put("text", text)
                put("voiceId", voiceId)
            }

            val requestBody = json.toString().toRequestBody("application/json".toMediaType())

            val request = Request.Builder()
                .url(proxyUrl)
                .post(requestBody)
                .build()

            client.newCall(request).execute().use { response ->
                if (!response.isSuccessful) {
                    val errorBody = response.body?.string() ?: "no body"
                    Log.e(TAG, "TTS proxy error: ${response.code} ${response.message}")
                    Log.e(TAG, "Error body: $errorBody")
                    return null
                }

                val responseBody = response.body?.string() ?: return null
                val responseJson = JSONObject(responseBody)
                return responseJson.getString("audioUrl")
            }

        } catch (e: Exception) {
            Log.e(TAG, "Failed to call TTS proxy", e)
            return null
        }
    }

    /**
     * Speak using Android built-in TTS.
     */
    private fun speakWithAndroidTTS(text: String, onComplete: () -> Unit, onError: (String) -> Unit) {
        if (!ttsReady || androidTTS == null) {
            onError("Android TTS not ready")
            return
        }

        try {
            androidTTS?.setOnUtteranceProgressListener(object : android.speech.tts.UtteranceProgressListener() {
                override fun onStart(utteranceId: String?) {
                    Log.d(TAG, "Android TTS started")
                }

                override fun onDone(utteranceId: String?) {
                    Log.d(TAG, "Android TTS completed")
                    onComplete()
                }

                override fun onError(utteranceId: String?) {
                    Log.e(TAG, "Android TTS error")
                    onError("Android TTS error")
                }
            })

            androidTTS?.speak(text, TextToSpeech.QUEUE_FLUSH, null, "alfred-${System.currentTimeMillis()}")
            Log.d(TAG, "Speaking with Android TTS")

        } catch (e: Exception) {
            Log.e(TAG, "Failed to use Android TTS", e)
            onError("Android TTS failed: ${e.message}")
        }
    }

    /**
     * Play streaming audio from URL.
     */
    private fun playStreamingAudio(streamUrl: String, onComplete: () -> Unit, onError: (String) -> Unit) {
        try {
            // Stop any existing playback
            stopPlayback()

            mediaPlayer = MediaPlayer().apply {
                setDataSource(streamUrl)
                setOnPreparedListener {
                    Log.d(TAG, "Stream prepared, starting playback")
                    start()
                }
                setOnCompletionListener {
                    Log.d(TAG, "Playback completed")
                    stopPlayback()
                    onComplete()
                }
                setOnErrorListener { _, what, extra ->
                    Log.e(TAG, "MediaPlayer error: what=$what extra=$extra")
                    stopPlayback()

                    // Fallback to Android TTS on streaming error
                    Log.w(TAG, "Streaming failed, falling back to Android TTS")
                    // We can't easily get the original text here, so just call the error handler
                    onError("Streaming error, using fallback")
                    true
                }
                setOnInfoListener { _, what, extra ->
                    Log.d(TAG, "MediaPlayer info: what=$what extra=$extra")
                    false
                }

                // Prepare async to avoid blocking
                prepareAsync()
            }

            Log.d(TAG, "Streaming audio from: $streamUrl")

        } catch (e: Exception) {
            Log.e(TAG, "Failed to stream audio", e)
            onError("Failed to stream audio: ${e.message}")
        }
    }

    /**
     * Stop current playback.
     */
    fun stopPlayback() {
        // Stop MediaPlayer (ElevenLabs)
        mediaPlayer?.let {
            if (it.isPlaying) {
                it.stop()
            }
            it.release()
        }
        mediaPlayer = null

        // Stop Android TTS
        androidTTS?.stop()
    }

    /**
     * Check if currently playing.
     */
    fun isPlaying(): Boolean {
        return mediaPlayer?.isPlaying == true || androidTTS?.isSpeaking == true
    }

    /**
     * Cleanup resources.
     */
    fun destroy() {
        stopPlayback()
        androidTTS?.shutdown()
        androidTTS = null
        scope.cancel()
    }
}