Rewrite to use chat.completions with manual MCP tool execution
All checks were successful
OpenWebUI Discord Bot / Build-and-Push (push) Successful in 55s

Major refactor to fix Bedrock + MCP compatibility issues:

- Removed Responses API approach (doesn't work with Bedrock)
- Added execute_mcp_tool() to manually call tools via /mcp/call_tool
- Rewrote get_available_mcp_tools() to return OpenAI function format
- Implemented manual tool execution loop with max 5 iterations
- Tool results are sent back to model in standard tool response format
- Removed query_needs_tools() function (no longer needed)

How it works:
1. Fetch MCP tools from /v1/mcp/tools and convert to OpenAI format
2. Call chat.completions.create() with tools array
3. When model requests tool calls, execute via POST /mcp/call_tool
4. Send results back to model with role="tool"
5. Loop until model provides final response

This bypasses the broken Responses API and uses working endpoints
that are compatible with AWS Bedrock + LiteLLM MCP integration.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-12 11:34:24 -08:00
parent aca70dbd0b
commit 94651b6ec1

View File

@@ -80,72 +80,100 @@ async def download_image(url: str) -> str | None:
print(f"Error downloading image from {url}: {e}") print(f"Error downloading image from {url}: {e}")
return None return None
def query_needs_tools(message: str) -> bool: async def execute_mcp_tool(tool_name: str, arguments: dict) -> str:
""" """Execute an MCP tool via LiteLLM's /mcp/call_tool endpoint"""
Detect if a query likely needs MCP tool access. import json
Returns True if the message contains keywords suggesting the need for:
- Real-time data (time, weather, current events)
- GitHub operations (repos, code, files, issues, PRs)
- Search/lookup operations
- File system access
"""
tool_keywords = [
# Time-related
'time', 'clock', 'date', 'today', 'now', 'current', 'when',
# Weather
'weather', 'temperature', 'forecast',
# GitHub operations
'github', 'repo', 'repository', 'repositories', 'code', 'file', 'files',
'commit', 'commits', 'branch', 'branches', 'pr', 'pull request',
'issue', 'issues', 'merge', 'fork', 'clone',
# Search/lookup
'search', 'look up', 'find', 'locate', 'get', 'fetch', 'retrieve',
'show me', 'tell me about', 'what is', 'how many',
# File operations
'read', 'open', 'check', 'list', 'contents'
]
message_lower = message.lower()
needs_tools = any(keyword in message_lower for keyword in tool_keywords)
if DEBUG_LOGGING and needs_tools:
matched = [kw for kw in tool_keywords if kw in message_lower]
debug_log(f"Query needs tools - matched keywords: {matched}")
return needs_tools
async def get_available_mcp_servers():
"""Query LiteLLM for available MCP servers (used with Responses API)"""
try: try:
base_url = LITELLM_API_BASE.rstrip('/') base_url = LITELLM_API_BASE.rstrip('/')
headers = {"x-litellm-api-key": LITELLM_API_KEY} headers = {
"Authorization": f"Bearer {LITELLM_API_KEY}",
"Content-Type": "application/json"
}
debug_log(f"Executing MCP tool: {tool_name} with args: {arguments}")
async with httpx.AsyncClient(timeout=60.0) as http_client:
response = await http_client.post(
f"{base_url}/mcp/call_tool",
headers=headers,
json={
"name": tool_name,
"arguments": arguments
}
)
debug_log(f"MCP call_tool response status: {response.status_code}")
if response.status_code == 200:
result = response.json()
debug_log(f"MCP tool result: {str(result)[:200]}...")
# MCP returns content in various formats, extract the text
if isinstance(result, dict):
if "content" in result:
content = result["content"]
if isinstance(content, list) and len(content) > 0:
# Handle text content blocks
first_content = content[0]
if isinstance(first_content, dict) and "text" in first_content:
return first_content["text"]
return json.dumps(content)
return json.dumps(content) if content else "Tool executed successfully"
return json.dumps(result)
return str(result)
else:
error_text = response.text
debug_log(f"MCP call_tool error: {response.status_code} - {error_text}")
return f"Error executing tool: {response.status_code} - {error_text}"
except Exception as e:
debug_log(f"Exception calling MCP tool: {e}")
import traceback
debug_log(f"Traceback: {traceback.format_exc()}")
return f"Error executing tool: {str(e)}"
async def get_available_mcp_tools():
"""Query LiteLLM for available MCP tools and convert to OpenAI function format"""
try:
base_url = LITELLM_API_BASE.rstrip('/')
headers = {"Authorization": f"Bearer {LITELLM_API_KEY}"}
async with httpx.AsyncClient(timeout=30.0) as http_client: async with httpx.AsyncClient(timeout=30.0) as http_client:
# Get MCP server configuration # Get available MCP tools
server_response = await http_client.get( tools_response = await http_client.get(
f"{base_url}/v1/mcp/server", f"{base_url}/v1/mcp/tools",
headers=headers headers=headers
) )
if server_response.status_code == 200: if tools_response.status_code == 200:
server_info = server_response.json() tools_data = tools_response.json()
server_count = len(server_info) if isinstance(server_info, list) else 0 mcp_tools = tools_data.get("tools", []) if isinstance(tools_data, dict) else tools_data
debug_log(f"MCP server info: found {server_count} servers") debug_log(f"Found {len(mcp_tools)} MCP tools")
if server_count > 0: # Convert MCP tools to OpenAI function calling format
# Log server names for visibility openai_tools = []
server_names = [s.get("server_name") for s in server_info if isinstance(s, dict) and s.get("server_name")] for tool in mcp_tools:
debug_log(f"Available MCP servers: {server_names}") if isinstance(tool, dict) and tool.get("name") and tool.get("description"):
openai_tool = {
"type": "function",
"function": {
"name": tool["name"],
"description": tool.get("description", ""),
"parameters": tool.get("inputSchema", {"type": "object", "properties": {}})
}
}
openai_tools.append(openai_tool)
return {"server": server_info} debug_log(f"Converted {len(openai_tools)} tools to OpenAI format")
return openai_tools
else: else:
debug_log(f"MCP server endpoint returned {server_response.status_code}: {server_response.text}") debug_log(f"MCP tools endpoint returned {tools_response.status_code}")
except Exception as e: except Exception as e:
debug_log(f"Error fetching MCP servers: {e}") debug_log(f"Error fetching MCP tools: {e}")
return None return []
async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[Dict[str, Any]]: async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[Dict[str, Any]]:
""" """
@@ -291,7 +319,9 @@ async def get_chat_history(channel, bot_user_id: int, limit: int = 50) -> List[D
async def get_ai_response(history_messages: List[Dict[str, Any]], user_message: str, image_urls: List[str] = None) -> str: async def get_ai_response(history_messages: List[Dict[str, Any]], user_message: str, image_urls: List[str] = None) -> str:
""" """
Get AI response using LiteLLM Responses API with automatic MCP tool execution. Get AI response using LiteLLM chat.completions with manual MCP tool execution.
Uses manual tool execution loop since Responses API doesn't work with Bedrock + MCP.
Args: Args:
history_messages: List of previous conversation messages with roles history_messages: List of previous conversation messages with roles
@@ -301,113 +331,13 @@ async def get_ai_response(history_messages: List[Dict[str, Any]], user_message:
Returns: Returns:
AI response string AI response string
""" """
try: import json
# When tools are enabled, use Responses API with MCP for automatic tool execution
if ENABLE_TOOLS:
debug_log("Tools enabled - using Responses API with MCP auto-execution")
# Query MCP server info to get server_label
mcp_info = await get_available_mcp_servers()
# Build input array with system prompt, history, and current message
input_messages = []
# Add system prompt as developer role for newer models
input_messages.append({
"role": "developer",
"content": SYSTEM_PROMPT
})
# Add conversation history
for msg in history_messages:
input_messages.append({
"role": msg["role"],
"content": msg["content"]
})
# Build current user message
if image_urls:
# Multi-modal message with text and images
content_parts = [{"type": "text", "text": user_message}]
for url in image_urls:
base64_image = await download_image(url)
if base64_image:
content_parts.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
})
input_messages.append({"role": "user", "content": content_parts})
else:
input_messages.append({"role": "user", "content": user_message})
# Build MCP tools configuration
tools_config = []
if mcp_info and isinstance(mcp_info, dict):
server_list = mcp_info.get("server", [])
if isinstance(server_list, list) and len(server_list) > 0:
for server_info in server_list:
server_name = server_info.get("server_name")
if server_name:
tools_config.append({
"type": "mcp",
"server_label": server_name,
"server_url": "litellm_proxy", # Use LiteLLM as MCP gateway
"require_approval": "never" # Automatic tool execution
})
debug_log(f"Added MCP server '{server_name}' with auto-execution")
if not tools_config:
debug_log("No MCP servers found, falling back to standard chat completions")
# Fall through to standard chat completions below
else:
# Determine if we should force tool usage based on query content
tool_choice_value = "required" if query_needs_tools(user_message) else "auto"
debug_log(f"Tool choice mode: {tool_choice_value}")
# Use Responses API with MCP tools
debug_log(f"Calling Responses API with {len(tools_config)} MCP servers")
debug_log(f"Input messages: {len(input_messages)} messages")
response = client.responses.create(
model=MODEL_NAME,
input=input_messages,
tools=tools_config,
tool_choice=tool_choice_value,
stream=False
)
debug_log(f"Response received, status: {getattr(response, 'status', 'unknown')}")
# Extract text from Responses API format
# Try the shorthand first
response_text = getattr(response, 'output_text', None)
if response_text:
debug_log(f"Got response via output_text shorthand: {response_text[:100]}...")
return response_text
# Otherwise navigate the structure
if hasattr(response, 'output') and len(response.output) > 0:
for output in response.output:
if hasattr(output, 'type') and output.type == "message":
if hasattr(output, 'content') and len(output.content) > 0:
for content in output.content:
if hasattr(content, 'type') and content.type == "output_text":
debug_log(f"Got response via structure navigation: {content.text[:100]}...")
return content.text
debug_log(f"Unexpected response format: {response}")
debug_log(f"Response attributes: {dir(response)}")
return "I received a response but couldn't extract the text. Please try again."
# Standard chat completions (when tools disabled or MCP not available)
debug_log("Using standard chat completions")
# Build messages array
messages = [{"role": "system", "content": SYSTEM_PROMPT}] messages = [{"role": "system", "content": SYSTEM_PROMPT}]
messages.extend(history_messages) messages.extend(history_messages)
# Build current user message
if image_urls: if image_urls:
content_parts = [{"type": "text", "text": user_message}] content_parts = [{"type": "text", "text": user_message}]
for url in image_urls: for url in image_urls:
@@ -421,13 +351,95 @@ async def get_ai_response(history_messages: List[Dict[str, Any]], user_message:
else: else:
messages.append({"role": "user", "content": user_message}) messages.append({"role": "user", "content": user_message})
response = client.chat.completions.create( try:
model=MODEL_NAME, # Build request parameters
messages=messages, request_params = {
temperature=0.7 "model": MODEL_NAME,
) "messages": messages,
"temperature": 0.7,
}
return response.choices[0].message.content # Add MCP tools if enabled
tools = []
if ENABLE_TOOLS:
debug_log("Tools enabled - fetching MCP tools")
tools = await get_available_mcp_tools()
if tools:
request_params["tools"] = tools
request_params["tool_choice"] = "auto"
debug_log(f"Added {len(tools)} tools to request")
debug_log(f"Calling chat.completions with {len(tools)} tools")
response = client.chat.completions.create(**request_params)
# Handle tool calls if present
response_message = response.choices[0].message
tool_calls = getattr(response_message, 'tool_calls', None)
# Tool execution loop (max 5 iterations to prevent infinite loops)
max_iterations = 5
iteration = 0
while tool_calls and len(tool_calls) > 0 and iteration < max_iterations:
iteration += 1
debug_log(f"Tool call iteration {iteration}: Model requested {len(tool_calls)} tool calls")
# Add assistant's response with tool calls to messages
messages.append({
"role": "assistant",
"content": response_message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments
}
}
for tc in tool_calls
]
})
# Execute each tool call via MCP
for tool_call in tool_calls:
function_name = tool_call.function.name
function_args_str = tool_call.function.arguments
debug_log(f"Executing tool: {function_name}")
# Parse arguments
try:
args_dict = json.loads(function_args_str) if isinstance(function_args_str, str) else function_args_str
except json.JSONDecodeError:
args_dict = {}
debug_log(f"Failed to parse tool arguments: {function_args_str}")
# Execute the tool via MCP
tool_result = await execute_mcp_tool(function_name, args_dict)
# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": tool_result
})
# Get next response from model
debug_log("Getting model response after tool execution")
request_params["messages"] = messages
response = client.chat.completions.create(**request_params)
response_message = response.choices[0].message
tool_calls = getattr(response_message, 'tool_calls', None)
if iteration >= max_iterations:
debug_log(f"Warning: Reached max tool iterations ({max_iterations})")
final_content = response.choices[0].message.content
debug_log(f"Final response: {final_content[:100] if final_content else 'None'}...")
return final_content or "I received a response but it was empty. Please try again."
except Exception as e: except Exception as e:
error_msg = f"Error calling LiteLLM API: {str(e)}" error_msg = f"Error calling LiteLLM API: {str(e)}"