Rewrite to use chat.completions with manual MCP tool execution

Major refactor to fix Bedrock + MCP compatibility issues: - Removed Responses API approach (doesn't work with Bedrock) - Added execute_mcp_tool() to manually call tools via /mcp/call_tool - Rewrote get_available_mcp_tools() to return OpenAI function format - Implemented manual tool execution loop with max 5 iterations - Tool results are sent back to model in standard tool response format - Removed query_needs_tools() function (no longer needed) How it works: 1. Fetch MCP tools from /v1/mcp/tools and convert to OpenAI format 2. Call chat.completions.create() with tools array 3. When model requests tool calls, execute via POST /mcp/call_tool 4. Send results back to model with role="tool" 5. Loop until model provides final response This bypasses the broken Responses API and uses working endpoints that are compatible with AWS Bedrock + LiteLLM MCP integration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-12 11:34:24 -08:00
parent aca70dbd0b
commit 94651b6ec1
1 changed files with 180 additions and 168 deletions
--- a/scripts/discordbot.py
+++ b/scripts/discordbot.py
@@ -80,72 +80,100 @@ async def download_image(url: str) -> str | None:
        print(f"Error downloading image from {url}: {e}")
    return None

-def query_needs_tools(message: str) -> bool:
-    """
-    Detect if a query likely needs MCP tool access.
+async def execute_mcp_tool(tool_name: str, arguments: dict) -> str:
+    """Execute an MCP tool via LiteLLM's /mcp/call_tool endpoint"""
+    import json

-    Returns True if the message contains keywords suggesting the need for:
-    - Real-time data (time, weather, current events)
-    - GitHub operations (repos, code, files, issues, PRs)
-    - Search/lookup operations
-    - File system access
-    """
-    tool_keywords = [
-        # Time-related
-        'time', 'clock', 'date', 'today', 'now', 'current', 'when',
-        # Weather
-        'weather', 'temperature', 'forecast',
-        # GitHub operations
-        'github', 'repo', 'repository', 'repositories', 'code', 'file', 'files',
-        'commit', 'commits', 'branch', 'branches', 'pr', 'pull request',
-        'issue', 'issues', 'merge', 'fork', 'clone',
-        # Search/lookup
-        'search', 'look up', 'find', 'locate', 'get', 'fetch', 'retrieve',
-        'show me', 'tell me about', 'what is', 'how many',
-        # File operations
-        'read', 'open', 'check', 'list', 'contents'
-    ]
-
-    message_lower = message.lower()
-    needs_tools = any(keyword in message_lower for keyword in tool_keywords)
-
-    if DEBUG_LOGGING and needs_tools:
-        matched = [kw for kw in tool_keywords if kw in message_lower]
-        debug_log(f"Query needs tools - matched keywords: {matched}")
-
-    return needs_tools
-
-async def get_available_mcp_servers():
-    """Query LiteLLM for available MCP servers (used with Responses API)"""
    try:
        base_url = LITELLM_API_BASE.rstrip('/')
-        headers = {"x-litellm-api-key": LITELLM_API_KEY}
+        headers = {
+            "Authorization": f"Bearer {LITELLM_API_KEY}",
+            "Content-Type": "application/json"
+        }
+
+        debug_log(f"Executing MCP tool: {tool_name} with args: {arguments}")
+
+        async with httpx.AsyncClient(timeout=60.0) as http_client:
+            response = await http_client.post(
+                f"{base_url}/mcp/call_tool",
+                headers=headers,
+                json={
+                    "name": tool_name,
+                    "arguments": arguments
+                }
+            )
+
+            debug_log(f"MCP call_tool response status: {response.status_code}")
+
+            if response.status_code == 200:
+                result = response.json()
+                debug_log(f"MCP tool result: {str(result)[:200]}...")
+
+                # MCP returns content in various formats, extract the text
+                if isinstance(result, dict):
+                    if "content" in result:
+                        content = result["content"]
+                        if isinstance(content, list) and len(content) > 0:
+                            # Handle text content blocks
+                            first_content = content[0]
+                            if isinstance(first_content, dict) and "text" in first_content:
+                                return first_content["text"]
+                            return json.dumps(content)
+                        return json.dumps(content) if content else "Tool executed successfully"
+                    return json.dumps(result)
+                return str(result)
+            else:
+                error_text = response.text
+                debug_log(f"MCP call_tool error: {response.status_code} - {error_text}")
+                return f"Error executing tool: {response.status_code} - {error_text}"
+
+    except Exception as e:
+        debug_log(f"Exception calling MCP tool: {e}")
+        import traceback
+        debug_log(f"Traceback: {traceback.format_exc()}")
+        return f"Error executing tool: {str(e)}"
+
+async def get_available_mcp_tools():
+    """Query LiteLLM for available MCP tools and convert to OpenAI function format"""
+    try:
+        base_url = LITELLM_API_BASE.rstrip('/')
+        headers = {"Authorization": f"Bearer {LITELLM_API_KEY}"}

        async with httpx.AsyncClient(timeout=30.0) as http_client:
-            # Get MCP server configuration
-            server_response = await http_client.get(
-                f"{base_url}/v1/mcp/server",
+            # Get available MCP tools
+            tools_response = await http_client.get(
+                f"{base_url}/v1/mcp/tools",
                headers=headers
            )

-            if server_response.status_code == 200:
-                server_info = server_response.json()
-                server_count = len(server_info) if isinstance(server_info, list) else 0
-                debug_log(f"MCP server info: found {server_count} servers")
+            if tools_response.status_code == 200:
+                tools_data = tools_response.json()
+                mcp_tools = tools_data.get("tools", []) if isinstance(tools_data, dict) else tools_data
+                debug_log(f"Found {len(mcp_tools)} MCP tools")

-                if server_count > 0:
-                    # Log server names for visibility
-                    server_names = [s.get("server_name") for s in server_info if isinstance(s, dict) and s.get("server_name")]
-                    debug_log(f"Available MCP servers: {server_names}")
+                # Convert MCP tools to OpenAI function calling format
+                openai_tools = []
+                for tool in mcp_tools:
+                    if isinstance(tool, dict) and tool.get("name") and tool.get("description"):
+                        openai_tool = {
+                            "type": "function",
+                            "function": {
+                                "name": tool["name"],
+                                "description": tool.get("description", ""),
+                                "parameters": tool.get("inputSchema", {"type": "object", "properties": {}})
+                            }
+                        }
+                        openai_tools.append(openai_tool)

-                return {"server": server_info}
+                debug_log(f"Converted {len(openai_tools)} tools to OpenAI format")
+                return openai_tools
            else:
-                debug_log(f"MCP server endpoint returned {server_response.status_code}: {server_response.text}")
+                debug_log(f"MCP tools endpoint returned {tools_response.status_code}")

    except Exception as e:
-        debug_log(f"Error fetching MCP servers: {e}")
+        debug_log(f"Error fetching MCP tools: {e}")

-    return None
+    return []

 async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[Dict[str, Any]]:
    """
@@ -291,7 +319,9 @@ async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[D

 async def get_ai_response(history_messages: List[Dict[str, Any]], user_message: str, image_urls: List[str] = None) -> str:
    """
-    Get AI response using LiteLLM Responses API with automatic MCP tool execution.
+    Get AI response using LiteLLM chat.completions with manual MCP tool execution.
+
+    Uses manual tool execution loop since Responses API doesn't work with Bedrock + MCP.

    Args:
        history_messages: List of previous conversation messages with roles
@@ -301,133 +331,115 @@ async def get_ai_response(history_messages: List[Dict[str, Any]], user_message:
    Returns:
        AI response string
    """
+    import json
+
+    # Build messages array
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    messages.extend(history_messages)
+
+    # Build current user message
+    if image_urls:
+        content_parts = [{"type": "text", "text": user_message}]
+        for url in image_urls:
+            base64_image = await download_image(url)
+            if base64_image:
+                content_parts.append({
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
+                })
+        messages.append({"role": "user", "content": content_parts})
+    else:
+        messages.append({"role": "user", "content": user_message})
+
    try:
-        # When tools are enabled, use Responses API with MCP for automatic tool execution
+        # Build request parameters
+        request_params = {
+            "model": MODEL_NAME,
+            "messages": messages,
+            "temperature": 0.7,
+        }
+
+        # Add MCP tools if enabled
+        tools = []
        if ENABLE_TOOLS:
-            debug_log("Tools enabled - using Responses API with MCP auto-execution")
+            debug_log("Tools enabled - fetching MCP tools")
+            tools = await get_available_mcp_tools()

-            # Query MCP server info to get server_label
-            mcp_info = await get_available_mcp_servers()
+            if tools:
+                request_params["tools"] = tools
+                request_params["tool_choice"] = "auto"
+                debug_log(f"Added {len(tools)} tools to request")

-            # Build input array with system prompt, history, and current message
-            input_messages = []
+        debug_log(f"Calling chat.completions with {len(tools)} tools")
+        response = client.chat.completions.create(**request_params)

-            # Add system prompt as developer role for newer models
-            input_messages.append({
-                "role": "developer",
-                "content": SYSTEM_PROMPT
+        # Handle tool calls if present
+        response_message = response.choices[0].message
+        tool_calls = getattr(response_message, 'tool_calls', None)
+
+        # Tool execution loop (max 5 iterations to prevent infinite loops)
+        max_iterations = 5
+        iteration = 0
+
+        while tool_calls and len(tool_calls) > 0 and iteration < max_iterations:
+            iteration += 1
+            debug_log(f"Tool call iteration {iteration}: Model requested {len(tool_calls)} tool calls")
+
+            # Add assistant's response with tool calls to messages
+            messages.append({
+                "role": "assistant",
+                "content": response_message.content,
+                "tool_calls": [
+                    {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments
+                        }
+                    }
+                    for tc in tool_calls
+                ]
            })

-            # Add conversation history
-            for msg in history_messages:
-                input_messages.append({
-                    "role": msg["role"],
-                    "content": msg["content"]
+            # Execute each tool call via MCP
+            for tool_call in tool_calls:
+                function_name = tool_call.function.name
+                function_args_str = tool_call.function.arguments
+
+                debug_log(f"Executing tool: {function_name}")
+
+                # Parse arguments
+                try:
+                    args_dict = json.loads(function_args_str) if isinstance(function_args_str, str) else function_args_str
+                except json.JSONDecodeError:
+                    args_dict = {}
+                    debug_log(f"Failed to parse tool arguments: {function_args_str}")
+
+                # Execute the tool via MCP
+                tool_result = await execute_mcp_tool(function_name, args_dict)
+
+                # Add tool result to messages
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": tool_call.id,
+                    "content": tool_result
                })

-            # Build current user message
-            if image_urls:
-                # Multi-modal message with text and images
-                content_parts = [{"type": "text", "text": user_message}]
-                for url in image_urls:
-                    base64_image = await download_image(url)
-                    if base64_image:
-                        content_parts.append({
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{base64_image}"
-                            }
-                        })
-                input_messages.append({"role": "user", "content": content_parts})
-            else:
-                input_messages.append({"role": "user", "content": user_message})
+            # Get next response from model
+            debug_log("Getting model response after tool execution")
+            request_params["messages"] = messages
+            response = client.chat.completions.create(**request_params)

-            # Build MCP tools configuration
-            tools_config = []
-            if mcp_info and isinstance(mcp_info, dict):
-                server_list = mcp_info.get("server", [])
-                if isinstance(server_list, list) and len(server_list) > 0:
-                    for server_info in server_list:
-                        server_name = server_info.get("server_name")
-                        if server_name:
-                            tools_config.append({
-                                "type": "mcp",
-                                "server_label": server_name,
-                                "server_url": "litellm_proxy",  # Use LiteLLM as MCP gateway
-                                "require_approval": "never"  # Automatic tool execution
-                            })
-                            debug_log(f"Added MCP server '{server_name}' with auto-execution")
+            response_message = response.choices[0].message
+            tool_calls = getattr(response_message, 'tool_calls', None)

-            if not tools_config:
-                debug_log("No MCP servers found, falling back to standard chat completions")
-                # Fall through to standard chat completions below
-            else:
-                # Determine if we should force tool usage based on query content
-                tool_choice_value = "required" if query_needs_tools(user_message) else "auto"
-                debug_log(f"Tool choice mode: {tool_choice_value}")
+        if iteration >= max_iterations:
+            debug_log(f"Warning: Reached max tool iterations ({max_iterations})")

-                # Use Responses API with MCP tools
-                debug_log(f"Calling Responses API with {len(tools_config)} MCP servers")
-                debug_log(f"Input messages: {len(input_messages)} messages")
-
-                response = client.responses.create(
-                    model=MODEL_NAME,
-                    input=input_messages,
-                    tools=tools_config,
-                    tool_choice=tool_choice_value,
-                    stream=False
-                )
-
-                debug_log(f"Response received, status: {getattr(response, 'status', 'unknown')}")
-
-                # Extract text from Responses API format
-                # Try the shorthand first
-                response_text = getattr(response, 'output_text', None)
-
-                if response_text:
-                    debug_log(f"Got response via output_text shorthand: {response_text[:100]}...")
-                    return response_text
-
-                # Otherwise navigate the structure
-                if hasattr(response, 'output') and len(response.output) > 0:
-                    for output in response.output:
-                        if hasattr(output, 'type') and output.type == "message":
-                            if hasattr(output, 'content') and len(output.content) > 0:
-                                for content in output.content:
-                                    if hasattr(content, 'type') and content.type == "output_text":
-                                        debug_log(f"Got response via structure navigation: {content.text[:100]}...")
-                                        return content.text
-
-                debug_log(f"Unexpected response format: {response}")
-                debug_log(f"Response attributes: {dir(response)}")
-                return "I received a response but couldn't extract the text. Please try again."
-
-        # Standard chat completions (when tools disabled or MCP not available)
-        debug_log("Using standard chat completions")
-
-        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-        messages.extend(history_messages)
-
-        if image_urls:
-            content_parts = [{"type": "text", "text": user_message}]
-            for url in image_urls:
-                base64_image = await download_image(url)
-                if base64_image:
-                    content_parts.append({
-                        "type": "image_url",
-                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
-                    })
-            messages.append({"role": "user", "content": content_parts})
-        else:
-            messages.append({"role": "user", "content": user_message})
-
-        response = client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=messages,
-            temperature=0.7
-        )
-
-        return response.choices[0].message.content
+        final_content = response.choices[0].message.content
+        debug_log(f"Final response: {final_content[:100] if final_content else 'None'}...")
+        return final_content or "I received a response but it was empty. Please try again."

    except Exception as e:
        error_msg = f"Error calling LiteLLM API: {str(e)}"