Rewrite to use chat.completions with manual MCP tool execution

Major refactor to fix Bedrock + MCP compatibility issues: - Removed Responses API approach (doesn't work with Bedrock) - Added execute_mcp_tool() to manually call tools via /mcp/call_tool - Rewrote get_available_mcp_tools() to return OpenAI function format - Implemented manual tool execution loop with max 5 iterations - Tool results are sent back to model in standard tool response format - Removed query_needs_tools() function (no longer needed) How it works: 1. Fetch MCP tools from /v1/mcp/tools and convert to OpenAI format 2. Call chat.completions.create() with tools array 3. When model requests tool calls, execute via POST /mcp/call_tool 4. Send results back to model with role="tool" 5. Loop until model provides final response This bypasses the broken Responses API and uses working endpoints that are compatible with AWS Bedrock + LiteLLM MCP integration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-12 11:34:24 -08:00
parent aca70dbd0b
commit 94651b6ec1
1 changed files with 180 additions and 168 deletions
--- a/scripts/discordbot.py
+++ b/scripts/discordbot.py
@@ -80,72 +80,100 @@ async def download_image(url: str) -> str | None:
        print(f"Error downloading image from {url}: {e}")
    return None
-def query_needs_tools(message: str) -> bool:
+async def execute_mcp_tool(tool_name: str, arguments: dict) -> str:
-    """
+    """Execute an MCP tool via LiteLLM's /mcp/call_tool endpoint"""
-    Detect if a query likely needs MCP tool access.
+    import json
    Returns True if the message contains keywords suggesting the need for:
    - Real-time data (time, weather, current events)
    - GitHub operations (repos, code, files, issues, PRs)
    - Search/lookup operations
    - File system access
    """
    tool_keywords = [
        # Time-related
        'time', 'clock', 'date', 'today', 'now', 'current', 'when',
        # Weather
        'weather', 'temperature', 'forecast',
        # GitHub operations
        'github', 'repo', 'repository', 'repositories', 'code', 'file', 'files',
        'commit', 'commits', 'branch', 'branches', 'pr', 'pull request',
        'issue', 'issues', 'merge', 'fork', 'clone',
        # Search/lookup
        'search', 'look up', 'find', 'locate', 'get', 'fetch', 'retrieve',
        'show me', 'tell me about', 'what is', 'how many',
        # File operations
        'read', 'open', 'check', 'list', 'contents'
    ]
    message_lower = message.lower()
    needs_tools = any(keyword in message_lower for keyword in tool_keywords)
    if DEBUG_LOGGING and needs_tools:
        matched = [kw for kw in tool_keywords if kw in message_lower]
        debug_log(f"Query needs tools - matched keywords: {matched}")
    return needs_tools
 async def get_available_mcp_servers():
    """Query LiteLLM for available MCP servers (used with Responses API)"""
    try:
        base_url = LITELLM_API_BASE.rstrip('/')
-        headers = {"x-litellm-api-key": LITELLM_API_KEY}
+        headers = {
            "Authorization": f"Bearer {LITELLM_API_KEY}",
            "Content-Type": "application/json"
        }
        debug_log(f"Executing MCP tool: {tool_name} with args: {arguments}")
        async with httpx.AsyncClient(timeout=60.0) as http_client:
            response = await http_client.post(
                f"{base_url}/mcp/call_tool",
                headers=headers,
                json={
                    "name": tool_name,
                    "arguments": arguments
                }
            )
            debug_log(f"MCP call_tool response status: {response.status_code}")
            if response.status_code == 200:
                result = response.json()
                debug_log(f"MCP tool result: {str(result)[:200]}...")
                # MCP returns content in various formats, extract the text
                if isinstance(result, dict):
                    if "content" in result:
                        content = result["content"]
                        if isinstance(content, list) and len(content) > 0:
                            # Handle text content blocks
                            first_content = content[0]
                            if isinstance(first_content, dict) and "text" in first_content:
                                return first_content["text"]
                            return json.dumps(content)
                        return json.dumps(content) if content else "Tool executed successfully"
                    return json.dumps(result)
                return str(result)
            else:
                error_text = response.text
                debug_log(f"MCP call_tool error: {response.status_code} - {error_text}")
                return f"Error executing tool: {response.status_code} - {error_text}"
    except Exception as e:
        debug_log(f"Exception calling MCP tool: {e}")
        import traceback
        debug_log(f"Traceback: {traceback.format_exc()}")
        return f"Error executing tool: {str(e)}"
 async def get_available_mcp_tools():
    """Query LiteLLM for available MCP tools and convert to OpenAI function format"""
    try:
        base_url = LITELLM_API_BASE.rstrip('/')
        headers = {"Authorization": f"Bearer {LITELLM_API_KEY}"}
        async with httpx.AsyncClient(timeout=30.0) as http_client:
-            # Get MCP server configuration
+            # Get available MCP tools
-            server_response = await http_client.get(
+            tools_response = await http_client.get(
-                f"{base_url}/v1/mcp/server",
+                f"{base_url}/v1/mcp/tools",
                headers=headers
            )
-            if server_response.status_code == 200:
+            if tools_response.status_code == 200:
-                server_info = server_response.json()
+                tools_data = tools_response.json()
-                server_count = len(server_info) if isinstance(server_info, list) else 0
+                mcp_tools = tools_data.get("tools", []) if isinstance(tools_data, dict) else tools_data
-                debug_log(f"MCP server info: found {server_count} servers")
+                debug_log(f"Found {len(mcp_tools)} MCP tools")
-                if server_count > 0:
+                # Convert MCP tools to OpenAI function calling format
-                    # Log server names for visibility
+                openai_tools = []
-                    server_names = [s.get("server_name") for s in server_info if isinstance(s, dict) and s.get("server_name")]
+                for tool in mcp_tools:
-                    debug_log(f"Available MCP servers: {server_names}")
+                    if isinstance(tool, dict) and tool.get("name") and tool.get("description"):
                        openai_tool = {
                            "type": "function",
                            "function": {
                                "name": tool["name"],
                                "description": tool.get("description", ""),
                                "parameters": tool.get("inputSchema", {"type": "object", "properties": {}})
                            }
                        }
                        openai_tools.append(openai_tool)
-                return {"server": server_info}
+                debug_log(f"Converted {len(openai_tools)} tools to OpenAI format")
                return openai_tools
            else:
-                debug_log(f"MCP server endpoint returned {server_response.status_code}: {server_response.text}")
+                debug_log(f"MCP tools endpoint returned {tools_response.status_code}")
    except Exception as e:
-        debug_log(f"Error fetching MCP servers: {e}")
+        debug_log(f"Error fetching MCP tools: {e}")
-    return None
+    return []
 async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[Dict[str, Any]]:
    """
@@ -291,7 +319,9 @@ async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[D
 async def get_ai_response(history_messages: List[Dict[str, Any]], user_message: str, image_urls: List[str] = None) -> str:
    """
-    Get AI response using LiteLLM Responses API with automatic MCP tool execution.
+    Get AI response using LiteLLM chat.completions with manual MCP tool execution.
    Uses manual tool execution loop since Responses API doesn't work with Bedrock + MCP.
    Args:
        history_messages: List of previous conversation messages with roles
@@ -301,113 +331,13 @@ async def get_ai_response(history_messages: List[Dict[str, Any]], user_message:
    Returns:
        AI response string
    """
-    try:
+    import json
        # When tools are enabled, use Responses API with MCP for automatic tool execution
        if ENABLE_TOOLS:
            debug_log("Tools enabled - using Responses API with MCP auto-execution")
            # Query MCP server info to get server_label
            mcp_info = await get_available_mcp_servers()
            # Build input array with system prompt, history, and current message
            input_messages = []
            # Add system prompt as developer role for newer models
            input_messages.append({
                "role": "developer",
                "content": SYSTEM_PROMPT
            })
            # Add conversation history
            for msg in history_messages:
                input_messages.append({
                    "role": msg["role"],
                    "content": msg["content"]
                })
            # Build current user message
            if image_urls:
                # Multi-modal message with text and images
                content_parts = [{"type": "text", "text": user_message}]
                for url in image_urls:
                    base64_image = await download_image(url)
                    if base64_image:
                        content_parts.append({
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        })
                input_messages.append({"role": "user", "content": content_parts})
            else:
                input_messages.append({"role": "user", "content": user_message})
            # Build MCP tools configuration
            tools_config = []
            if mcp_info and isinstance(mcp_info, dict):
                server_list = mcp_info.get("server", [])
                if isinstance(server_list, list) and len(server_list) > 0:
                    for server_info in server_list:
                        server_name = server_info.get("server_name")
                        if server_name:
                            tools_config.append({
                                "type": "mcp",
                                "server_label": server_name,
                                "server_url": "litellm_proxy",  # Use LiteLLM as MCP gateway
                                "require_approval": "never"  # Automatic tool execution
                            })
                            debug_log(f"Added MCP server '{server_name}' with auto-execution")
            if not tools_config:
                debug_log("No MCP servers found, falling back to standard chat completions")
                # Fall through to standard chat completions below
            else:
                # Determine if we should force tool usage based on query content
                tool_choice_value = "required" if query_needs_tools(user_message) else "auto"
                debug_log(f"Tool choice mode: {tool_choice_value}")
                # Use Responses API with MCP tools
                debug_log(f"Calling Responses API with {len(tools_config)} MCP servers")
                debug_log(f"Input messages: {len(input_messages)} messages")
                response = client.responses.create(
                    model=MODEL_NAME,
                    input=input_messages,
                    tools=tools_config,
                    tool_choice=tool_choice_value,
                    stream=False
                )
                debug_log(f"Response received, status: {getattr(response, 'status', 'unknown')}")
                # Extract text from Responses API format
                # Try the shorthand first
                response_text = getattr(response, 'output_text', None)
                if response_text:
                    debug_log(f"Got response via output_text shorthand: {response_text[:100]}...")
                    return response_text
                # Otherwise navigate the structure
                if hasattr(response, 'output') and len(response.output) > 0:
                    for output in response.output:
                        if hasattr(output, 'type') and output.type == "message":
                            if hasattr(output, 'content') and len(output.content) > 0:
                                for content in output.content:
                                    if hasattr(content, 'type') and content.type == "output_text":
                                        debug_log(f"Got response via structure navigation: {content.text[:100]}...")
                                        return content.text
                debug_log(f"Unexpected response format: {response}")
                debug_log(f"Response attributes: {dir(response)}")
                return "I received a response but couldn't extract the text. Please try again."
        # Standard chat completions (when tools disabled or MCP not available)
        debug_log("Using standard chat completions")
    # Build messages array
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    messages.extend(history_messages)
    # Build current user message
    if image_urls:
        content_parts = [{"type": "text", "text": user_message}]
        for url in image_urls:
@@ -421,13 +351,95 @@ async def get_ai_response(history_messages: List[Dict[str, Any]], user_message:
    else:
        messages.append({"role": "user", "content": user_message})
-        response = client.chat.completions.create(
+    try:
-            model=MODEL_NAME,
+        # Build request parameters
-            messages=messages,
+        request_params = {
-            temperature=0.7
+            "model": MODEL_NAME,
-        )
+            "messages": messages,
            "temperature": 0.7,
        }
-        return response.choices[0].message.content
+        # Add MCP tools if enabled
        tools = []
        if ENABLE_TOOLS:
            debug_log("Tools enabled - fetching MCP tools")
            tools = await get_available_mcp_tools()
            if tools:
                request_params["tools"] = tools
                request_params["tool_choice"] = "auto"
                debug_log(f"Added {len(tools)} tools to request")
        debug_log(f"Calling chat.completions with {len(tools)} tools")
        response = client.chat.completions.create(**request_params)
        # Handle tool calls if present
        response_message = response.choices[0].message
        tool_calls = getattr(response_message, 'tool_calls', None)
        # Tool execution loop (max 5 iterations to prevent infinite loops)
        max_iterations = 5
        iteration = 0
        while tool_calls and len(tool_calls) > 0 and iteration < max_iterations:
            iteration += 1
            debug_log(f"Tool call iteration {iteration}: Model requested {len(tool_calls)} tool calls")
            # Add assistant's response with tool calls to messages
            messages.append({
                "role": "assistant",
                "content": response_message.content,
                "tool_calls": [
                    {
                        "id": tc.id,
                        "type": "function",
                        "function": {
                            "name": tc.function.name,
                            "arguments": tc.function.arguments
                        }
                    }
                    for tc in tool_calls
                ]
            })
            # Execute each tool call via MCP
            for tool_call in tool_calls:
                function_name = tool_call.function.name
                function_args_str = tool_call.function.arguments
                debug_log(f"Executing tool: {function_name}")
                # Parse arguments
                try:
                    args_dict = json.loads(function_args_str) if isinstance(function_args_str, str) else function_args_str
                except json.JSONDecodeError:
                    args_dict = {}
                    debug_log(f"Failed to parse tool arguments: {function_args_str}")
                # Execute the tool via MCP
                tool_result = await execute_mcp_tool(function_name, args_dict)
                # Add tool result to messages
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": tool_result
                })
            # Get next response from model
            debug_log("Getting model response after tool execution")
            request_params["messages"] = messages
            response = client.chat.completions.create(**request_params)
            response_message = response.choices[0].message
            tool_calls = getattr(response_message, 'tool_calls', None)
        if iteration >= max_iterations:
            debug_log(f"Warning: Reached max tool iterations ({max_iterations})")
        final_content = response.choices[0].message.content
        debug_log(f"Final response: {final_content[:100] if final_content else 'None'}...")
        return final_content or "I received a response but it was empty. Please try again."
    except Exception as e:
        error_msg = f"Error calling LiteLLM API: {str(e)}"