From e1901f5987938bb99acd6fed265ba1a71595a4c2 Mon Sep 17 00:00:00 2001 From: mcpflow Date: Thu, 24 Apr 2025 22:36:52 +0800 Subject: [PATCH] feat: add GLM models and about agent example with configuration and implementation --- examples/mcp_basic_zhipu_agent/README.md | 60 ++ examples/mcp_basic_zhipu_agent/main.py | 89 +++ .../mcp_agent.config.yaml | 43 ++ pyproject.toml | 4 + src/mcp_agent/config.py | 20 + .../artificial_analysis_llm_benchmarks.json | 134 +++- .../workflows/llm/augmented_llm_azure.py | 4 +- .../workflows/llm/augmented_llm_openai.py | 4 +- .../workflows/llm/augmented_llm_zhipu.py | 591 ++++++++++++++++++ src/mcp_agent/workflows/llm/llm_selector.py | 19 +- 10 files changed, 959 insertions(+), 9 deletions(-) create mode 100644 examples/mcp_basic_zhipu_agent/README.md create mode 100644 examples/mcp_basic_zhipu_agent/main.py create mode 100644 examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml create mode 100644 src/mcp_agent/workflows/llm/augmented_llm_zhipu.py diff --git a/examples/mcp_basic_zhipu_agent/README.md b/examples/mcp_basic_zhipu_agent/README.md new file mode 100644 index 000000000..c4533f8db --- /dev/null +++ b/examples/mcp_basic_zhipu_agent/README.md @@ -0,0 +1,60 @@ +# MCP Zhipu AI Agent Example - "Finder" Agent + +This example demonstrates how to create and run a basic "Finder" agent using Zhipu AI's GLM-4 model with MCP. The agent can access both `fetch` and `filesystem` MCP servers, allowing it to retrieve information from URLs and the local file system. + +## Prerequisites + +- Valid Zhipu AI API key +- Python 3.10+ environment + +## Setup + +Before running the agent, ensure you have: + +1. Register and obtain a Zhipu AI API key: + - Visit [Zhipu AI website](https://open.bigmodel.cn/) to register an account + - Create an API key in the console + +2. Configure the API key: + - Create a `mcp_agent.secrets.yaml` file and add your API key: + ```yaml + zhipu: + api_key: "your-zhipu-api-key-here" + ``` + +## Running the Example + +Install dependencies and run the example: + +```bash +# Install dependencies +pip install -e .. + +# Run the example +python main.py +``` + +## Example Features + +This example demonstrates: + +1. Using Zhipu AI's GLM-4 model within the MCP architecture +2. Retrieving web content via the fetch server +3. Reading local files via the filesystem server +4. Multi-turn conversation support +5. Support for prompts and responses in both English and Chinese + +## Supported Models + +Zhipu AI provides various large language models, including: + +- glm-4 - Zhipu base large model +- GLM-4-Plus - Enhanced large model +- GLM-4-Long - Large model with longer context support +- GLM-4-FlashX-250414 - High-performance Flash model +- GLM-4-Flash-250414 - Standard Flash model +- GLM-4-Air-250414 - Lightweight large model +- glm-4v - Zhipu vision large model +- glm-3-turbo - Zhipu basic conversation model + +This example uses the `glm-4` model by default, but you can change to any supported model in the configuration. \ No newline at end of file diff --git a/examples/mcp_basic_zhipu_agent/main.py b/examples/mcp_basic_zhipu_agent/main.py new file mode 100644 index 000000000..4a1b6a66a --- /dev/null +++ b/examples/mcp_basic_zhipu_agent/main.py @@ -0,0 +1,89 @@ +import asyncio + +from mcp_agent.config import ( + Settings, + MCPSettings, + MCPServerSettings, + ZhipuSettings, + LoggerSettings, +) +from mcp_agent.app import MCPApp +from mcp_agent.agents.agent import Agent +from mcp_agent.workflows.llm.augmented_llm_zhipu import ZhipuAugmentedLLM +from mcp_agent.workflows.llm.augmented_llm import RequestParams + + +async def run(): + """Run the finder agent example.""" + + # Create settings + settings = Settings( + mcp=MCPSettings( + servers={ + "fetch": MCPServerSettings( + command="uvx", + args=["mcp-server-fetch"], + ), + "hotnews": MCPServerSettings( + command="npx", + args=["-y", "@mcpflow.io/mcp-hotnews-mcp-server"], + ), + "time": MCPServerSettings( + command="uvx", + args=["mcp-server-time", "--local-timezone=America/New_York"], + ), + } + ), + execution_engine="asyncio", + logger=LoggerSettings(type="console", level="info"), + zhipu=ZhipuSettings( + api_key="", + default_model="glm-4-flashx-250414", # Use the same model as in augmented_llm_zhipu.py + ), + ) + + # Initialize the app with settings + app = MCPApp(name="mcp_basic_zhipu_agent", settings=settings) + + # Run the app + async with app.run(): + # Create an agent that can load different LLMs - Use more concise prompts + finder_agent = Agent( + name="finder", + instruction="""You are an assistant that can use tools to answer questions. + """, + server_names=["time"], + ) + + # list tools + tools = await finder_agent.list_tools() + print("Tools available:", tools) + + # Initialize the agent + async with finder_agent: + # Create the base agent with default model + llm = await finder_agent.attach_llm(ZhipuAugmentedLLM) + + # create request parameters - Explicitly specify the model + request_params = RequestParams( + model="glm-4-flashx-250414", # Explicitly specify the model to maintain consistency with augmented_llm_zhipu.py + temperature=0.1, + maxTokens=4096, + systemPrompt=None, # Don't use systemPrompt to avoid duplication with the instruction + ) + + try: + # Use a very explicit query + result = await llm.generate_str( + message="What time is it in New York? Use the time_get_current_time tool with timezone parameter set to 'America/New_York'.", + request_params=request_params, + force_tools=True, + ) + print("\n==== Response using tool ====") + print(result) + except Exception as e: + print(f"Error during model generation: {e}") + + +if __name__ == "__main__": + asyncio.run(run()) diff --git a/examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml b/examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml new file mode 100644 index 000000000..60f14d2b5 --- /dev/null +++ b/examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml @@ -0,0 +1,43 @@ +$schema: ../../schema/mcp-agent.config.schema.json + +execution_engine: asyncio +logger: + type: "console" + level: "info" + progress_display: true + path_settings: + path_pattern: "logs/mcp-agent-{unique_id}.jsonl" + unique_id: "timestamp" # Options: "timestamp" or "session_id" + timestamp_format: "%Y%m%d_%H%M%S" + +mcp: + servers: + filesystem: + transport: "stdio" + command: "npx" + args: + - "--yes" + - "@mtp-devtools/mcp-fs-server" + - "--stdio" + fetch: + transport: "stdio" + command: "npx" + args: + - "--yes" + - "@mtp-devtools/mcp-server-fetch" + - "--stdio" + +# Zhipu AI configuration +# Note: The actual API key should be in mcp_agent.secrets.yaml, this is just an example +zhipu: + api_key: "" + # Default model can be set to any of the following: + # - glm-4: Base large model + # - glm-4-plus: Enhanced large model with stronger capabilities + # - glm-4-long: Large model with longer context support + # - glm-4-flashx-250414: High-performance Flash model + # - glm-4-flash-250414: Standard Flash model + # - glm-4-air-250414: Lightweight large model + # - glm-4v: Zhipu vision large model + # - glm-3-turbo: Zhipu basic conversation model + default_model: "glm-4-flashx-250414" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 99f94f10d..11314397e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "prompt-toolkit>=3.0.50", "aiohttp>=3.11.13", "websockets>=12.0", + "zhipuai>=2.1.5", ] [project.optional-dependencies] @@ -53,6 +54,9 @@ google = [ cohere = [ "cohere>=5.13.4", ] +zhipu = [ + "zhipuai>=2.1.5", +] [build-system] requires = ["hatchling"] diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py index 5a6f51910..a09fc08f6 100644 --- a/src/mcp_agent/config.py +++ b/src/mcp_agent/config.py @@ -282,6 +282,23 @@ class LoggerSettings(BaseModel): """HTTP timeout seconds for event transport""" +class ZhipuSettings(BaseModel): + """ + Settings for using Zhipu AI models. + """ + + api_key: str | None = None + """API key for authentication.""" + + base_url: str | None = None + """Base URL for the Zhipu AI API (optional, defaults to official API endpoint).""" + + default_model: str = "glm-4" + """Default model to use.""" + + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + + class Settings(BaseSettings): """ Settings class for the MCP Agent application. @@ -331,6 +348,9 @@ class Settings(BaseSettings): usage_telemetry: UsageTelemetrySettings | None = UsageTelemetrySettings() """Usage tracking settings for the MCP Agent application""" + zhipu: ZhipuSettings | None = None + """Settings for using Zhipu AI models in the MCP Agent application""" + @classmethod def find_config(cls) -> Path | None: """Find the config file in the current directory or parent directories.""" diff --git a/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json b/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json index 14f976fbb..bd2d47e2f 100644 --- a/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json +++ b/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json @@ -6246,5 +6246,137 @@ "bbh_score": null } } + }, + { + "name": "glm-4-flash", + "description": "high-performance general-purpose language model", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 2.8, + "input_cost_per_1m": 1.8, + "output_cost_per_1m": 8.0 + }, + "speed": { + "time_to_first_token_ms": 900.0, + "tokens_per_second": 90.0 + }, + "intelligence": { + "quality_score": 76.0, + "mmlu_score": 82.0, + "gsm8k_score": 80.0, + "bbh_score": 72.0 + } + } + }, + { + "name": "glm-4-plus", + "description": "enhanced version of the large model, with stronger intelligence", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 4.0, + "input_cost_per_1m": 2.5, + "output_cost_per_1m": 12.0 + }, + "speed": { + "time_to_first_token_ms": 1100.0, + "tokens_per_second": 85.0 + }, + "intelligence": { + "quality_score": 82.0, + "mmlu_score": 85.0, + "gsm8k_score": 83.0, + "bbh_score": 75.0 + } + } + }, + { + "name": "glm-4-long", + "description": "supports longer context large model", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 6.0, + "input_cost_per_1m": 4.0, + "output_cost_per_1m": 14.0 + }, + "speed": { + "time_to_first_token_ms": 1200.0, + "tokens_per_second": 75.0 + }, + "intelligence": { + "quality_score": 80.0, + "mmlu_score": 83.0, + "gsm8k_score": 82.0, + "bbh_score": 74.0 + } + } + }, + { + "name": "glm-4-flashx-250414", + "description": "high-performance Flash model, faster response speed", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 3.0, + "input_cost_per_1m": 1.9, + "output_cost_per_1m": 9.0 + }, + "speed": { + "time_to_first_token_ms": 800.0, + "tokens_per_second": 100.0 + }, + "intelligence": { + "quality_score": 78.0, + "mmlu_score": 80.0, + "gsm8k_score": 78.0, + "bbh_score": 72.0 + } + } + }, + { + "name": "glm-4-flash-250414", + "description": "standard Flash model, balanced speed and performance", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 2.5, + "input_cost_per_1m": 1.6, + "output_cost_per_1m": 7.0 + }, + "speed": { + "time_to_first_token_ms": 850.0, + "tokens_per_second": 95.0 + }, + "intelligence": { + "quality_score": 75.0, + "mmlu_score": 78.0, + "gsm8k_score": 76.0, + "bbh_score": 70.0 + } + } + }, + { + "name": "glm-4-air-250414", + "description": "lightweight large model, lower resource consumption", + "provider": "ZhipuAI", + "metrics": { + "cost": { + "blended_cost_per_1m": 2.0, + "input_cost_per_1m": 1.2, + "output_cost_per_1m": 6.0 + }, + "speed": { + "time_to_first_token_ms": 750.0, + "tokens_per_second": 110.0 + }, + "intelligence": { + "quality_score": 70.0, + "mmlu_score": 72.0, + "gsm8k_score": 70.0, + "bbh_score": 65.0 + } + } } -] +] \ No newline at end of file diff --git a/src/mcp_agent/workflows/llm/augmented_llm_azure.py b/src/mcp_agent/workflows/llm/augmented_llm_azure.py index 5bfd9c805..078b67d33 100644 --- a/src/mcp_agent/workflows/llm/augmented_llm_azure.py +++ b/src/mcp_agent/workflows/llm/augmented_llm_azure.py @@ -125,9 +125,7 @@ async def generate(self, message, request_params: RequestParams | None = None): system_prompt = self.instruction or params.systemPrompt if system_prompt and len(messages) == 0: - messages.append( - SystemMessage(content=system_prompt) - ) + messages.append(SystemMessage(content=system_prompt)) if isinstance(message, str): messages.append(UserMessage(content=message)) diff --git a/src/mcp_agent/workflows/llm/augmented_llm_openai.py b/src/mcp_agent/workflows/llm/augmented_llm_openai.py index 6e2562eac..abfc40f69 100644 --- a/src/mcp_agent/workflows/llm/augmented_llm_openai.py +++ b/src/mcp_agent/workflows/llm/augmented_llm_openai.py @@ -68,7 +68,7 @@ def __init__(self, *args, **kwargs): if hasattr(self.context.config.openai, "reasoning_effort"): self._reasoning_effort = self.context.config.openai.reasoning_effort - self._reasoning = lambda model : model.startswith(("o1","o3","o4")) + self._reasoning = lambda model: model.startswith(("o1", "o3", "o4")) if self._reasoning(chosen_model): self.logger.info( @@ -163,10 +163,8 @@ async def generate(self, message, request_params: RequestParams | None = None): if self._reasoning(model): arguments = { **arguments, - # DEPRECATED: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens # "max_tokens": params.maxTokens, - "max_completion_tokens": params.maxTokens, "reasoning_effort": self._reasoning_effort, } diff --git a/src/mcp_agent/workflows/llm/augmented_llm_zhipu.py b/src/mcp_agent/workflows/llm/augmented_llm_zhipu.py new file mode 100644 index 000000000..0252405d1 --- /dev/null +++ b/src/mcp_agent/workflows/llm/augmented_llm_zhipu.py @@ -0,0 +1,591 @@ +import json +from typing import Type, Dict +import asyncio + +from zhipuai import ZhipuAI +from zhipuai.types.chat.chat_completion import ( + CompletionMessage, +) +from mcp.types import ( + CallToolResult, + ModelPreferences, + TextContent, +) + +from mcp_agent.workflows.llm.augmented_llm import ( + AugmentedLLM, + ModelT, + MCPMessageParam, + MCPMessageResult, + ProviderToMCPConverter, + RequestParams, + image_url_to_mime_and_base64, +) +from mcp_agent.logging.logger import get_logger + + +class ZhipuTypeConverter(ProviderToMCPConverter[Dict, CompletionMessage]): + """Converts between Zhipu AI and MCP types""" + + @classmethod + def from_mcp_message_result(cls, result: MCPMessageResult) -> CompletionMessage: + """Convert an MCP message result to a Zhipu AI message result.""" + # This is a simplified implementation - for a complete implementation, + # we would need to map all MCP message properties to Zhipu AI properties + content = "" + for part in result.message.content: + if part.type == "text": + content += part.content + + return CompletionMessage( + role="assistant", + content=content, + ) + + @classmethod + def to_mcp_message_result(cls, result: CompletionMessage) -> MCPMessageResult: + """Convert a Zhipu AI message result to an MCP message result.""" + # This is a simplified implementation - for a complete implementation, + # we would need to map all Zhipu AI properties to MCP properties + + # Extract content from result based on type + content = [] + if isinstance(result.content, str): + content.append(TextContent(type="text", content=result.content)) + elif isinstance(result.content, list): + # Handle list of content parts + for part in result.content: + if isinstance(part, dict) and part.get("type") == "text": + content.append( + TextContent(type="text", content=part.get("text", "")) + ) + # Handle other content types as needed + + # Create a simplified MCPMessageResult + return MCPMessageResult( + message={ + "role": "assistant", + "content": content, + } + ) + + @classmethod + def from_mcp_message_param(cls, param: MCPMessageParam) -> Dict: + """Convert an MCP message parameter to a Zhipu AI message parameter dict.""" + role = param.role + + # Handle different content types + if param.content and isinstance(param.content, list): + # Multi-modal content processing + content_parts = [] + has_non_text = False + + for part in param.content: + if part.type == "text": + content_parts.append({"type": "text", "text": part.content}) + has_non_text = True + elif part.type == "image": + # Convert image to Zhipu AI format + mime_type, base64_data = image_url_to_mime_and_base64(part.url) + content_parts.append( + { + "type": "image_url", + "image_url": { + "url": f"data:{mime_type};base64,{base64_data}" + }, + } + ) + has_non_text = True + + if has_non_text: + return {"role": role, "content": content_parts} + + # Text-only content + content = "" + if param.content and isinstance(param.content, list): + for part in param.content: + if part.type == "text": + content += part.content + + return {"role": role, "content": content} + + @classmethod + def to_mcp_message_param(cls, param: Dict) -> MCPMessageParam: + """Convert a Zhipu AI message parameter to an MCP message parameter.""" + content_list = [] + + # Process content based on type + if isinstance(param.get("content"), str): + content_list.append({"type": "text", "content": param.get("content")}) + elif isinstance(param.get("content"), list): + for item in param.get("content"): + if isinstance(item, dict): + if item.get("type") == "text": + content_list.append( + {"type": "text", "content": item.get("text", "")} + ) + # Handle other content types as needed + + return MCPMessageParam(role=param.get("role"), content=content_list) + + @classmethod + def from_mcp_tool_result(cls, result: CallToolResult, tool_use_id: str) -> Dict: + """Convert an MCP tool result to a Zhipu AI tool message parameter dict.""" + # Ensure result can be serialized as JSON string + try: + # Try to extract output content + if hasattr(result, "output") and result.output is not None: + content = json.dumps(result.output, ensure_ascii=False) + elif hasattr(result, "content"): + # Try to process content field + if isinstance(result.content, list): + # If content is a list, extract all text content + text_parts = [] + for part in result.content: + if hasattr(part, "text"): + text_parts.append(part.text) + elif isinstance(part, dict) and "text" in part: + text_parts.append(part["text"]) + content = json.dumps( + {"result": " ".join(text_parts)}, ensure_ascii=False + ) + else: + content = json.dumps( + {"result": str(result.content)}, ensure_ascii=False + ) + else: + # Fallback: convert entire result object to string + content = json.dumps({"result": str(result)}, ensure_ascii=False) + except Exception as e: + # If serialization fails, provide a default message + content = json.dumps( + {"error": f"Unable to serialize tool result: {str(e)}"}, + ensure_ascii=False, + ) + + return { + "role": "tool", + "tool_call_id": tool_use_id, + "content": content, + } + + +class ZhipuAugmentedLLM(AugmentedLLM[Dict, CompletionMessage]): + """ + An implementation of AugmentedLLM that uses Zhipu AI's models with support for + function calling and multi-modal inputs. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, type_converter=ZhipuTypeConverter, **kwargs) + + self.provider = "ZhipuAI" + # Initialize logger with name if available + self.logger = get_logger(f"{__name__}.{self.name}" if self.name else __name__) + + self.model_preferences = self.model_preferences or ModelPreferences( + costPriority=0.3, + speedPriority=0.4, + intelligencePriority=0.3, + ) + + # Initialize tool result history records + self.tool_results_history = [] + + # Get default model from config if available + chosen_model = "glm-4-flashx-250414" # Fallback default + + if ( + self.context + and self.context.config + and hasattr(self.context.config, "zhipu") + ): + if hasattr(self.context.config.zhipu, "default_model"): + chosen_model = self.context.config.zhipu.default_model + + self.default_request_params = self.default_request_params or RequestParams( + model=chosen_model, + modelPreferences=self.model_preferences, + maxTokens=4096, + systemPrompt=self.instruction, + parallel_tool_calls=False, + max_iterations=10, + use_history=True, + ) + + @classmethod + def convert_message_to_message_param( + cls, message: CompletionMessage, **kwargs + ) -> Dict: + """Convert a response object to an input parameter object to allow LLM calls to be chained.""" + assistant_message_params = { + "role": "assistant", + **kwargs, + } + if message.content is not None: + assistant_message_params["content"] = message.content + + # Fix tool call serialization issues + if hasattr(message, "tool_calls") and message.tool_calls is not None: + # Convert tool_calls to serializable dict list + tool_calls_serializable = [] + for tc in message.tool_calls: + tool_call_dict = { + "id": tc.id, + "type": "function", + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + tool_calls_serializable.append(tool_call_dict) + + assistant_message_params["tool_calls"] = tool_calls_serializable + + return assistant_message_params + + async def generate( + self, + message, + request_params: RequestParams | None = None, + force_tools: bool = False, + ): + """ + Process a query using an LLM and available tools. + This implementation uses Zhipu AI's ChatCompletion as the LLM. + + Args: + message: The input message or messages + request_params: Configuration parameters for the request + force_tools: If True, modify the prompt to force tool usage + """ + config = self.context.config + zhipu_client = ZhipuAI(api_key=config.zhipu.api_key) + + messages = [] + params = self.get_request_params(request_params) + + if params.use_history: + messages.extend(self.history.get()) + + system_prompt = self.instruction or params.systemPrompt + + if system_prompt and len(messages) == 0: + messages.append({"role": "system", "content": system_prompt}) + messages.append({"role": "user", "content": message}) + # Prepare tools for function calling if available + response = await self.aggregator.list_tools() + available_tools = [] + if response.tools: + available_tools = [ + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.inputSchema, + }, + } + for tool in response.tools + ] + + responses = [] + model = await self.select_model(params) + + for i in range(params.max_iterations): + arguments = { + "model": model, + "messages": messages, + "max_tokens": params.maxTokens, + } + + # Add tools if available + if available_tools: + arguments["tools"] = available_tools + + if force_tools: + arguments["tool_choice"] = "auto" + + if "temperature" not in arguments and params.temperature is None: + arguments["temperature"] = 0.1 + elif params.temperature is not None: + arguments["temperature"] = params.temperature + + # Add stop sequences if available + if params.stopSequences: + arguments["stop"] = params.stopSequences + + # Add extra parameters if available in metadata + if params.metadata: + arguments["extra_body"] = params.metadata + + self.logger.debug(f"Zhipu AI request arguments: {arguments}") + self._log_chat_progress(chat_turn=len(messages) // 2, model=model) + + try: + # Call in synchronous mode + completion_response = await asyncio.get_event_loop().run_in_executor( + None, lambda: zhipu_client.chat.completions.create(**arguments) + ) + + self.logger.debug( + "Zhipu AI Completion response:", + data=completion_response, + ) + + if isinstance(completion_response, BaseException): + self.logger.error(f"Error: {completion_response}") + break + + if ( + not completion_response.choices + or len(completion_response.choices) == 0 + ): + # No response from the model, we're done + break + + choice = completion_response.choices[0] + message = choice.message + responses.append(message) + + # Format message for next iteration or tool calls + converted_message = self.convert_message_to_message_param(message) + messages.append(converted_message) + + # If tools are being used and tool_calls are present + if hasattr(message, "tool_calls") and message.tool_calls: + # Execute all tool calls + tool_tasks = [ + self.execute_tool_call(tool_call) + for tool_call in message.tool_calls + ] + # Wait for all tool calls to complete + tool_results = await self.executor.execute(*tool_tasks) + + self.logger.debug( + f"Iteration {i}: Tool call results: {str(tool_results) if tool_results else 'None'}" + ) + + # Add non-None results to messages + for result in tool_results: + if isinstance(result, BaseException): + self.logger.error( + f"Warning: Unexpected error during tool execution: {result}. Continuing..." + ) + continue + if result is not None: + messages.append(result) + elif hasattr(message, "function_call") and message.function_call: + function_call = message.function_call + tool_call = type( + "ToolCall", + (), + { + "id": str(i), + "function": type( + "Function", + (), + { + "name": function_call.get("name", ""), + "arguments": function_call.get("arguments", "{}"), + }, + ), + }, + ) + + result = await self.execute_tool_call(tool_call) + if result is not None: + messages.append(result) + except Exception as e: + self.logger.error(f"Error during Zhipu AI request: {e}") + break + + self._log_chat_finished(model=model) + return responses + + async def generate_str( + self, + message, + request_params: RequestParams | None = None, + force_tools: bool = True, # Enable forced tool calls by default + ): + """Request an LLM generation and return the string representation of the result""" + responses = await self.generate( + message, request_params, force_tools=force_tools + ) + if not responses: + return "" + + # Extract text content from the final response + return self.message_str(responses[-1]) + + async def generate_structured( + self, + message, + response_model: Type[ModelT], + request_params: RequestParams | None = None, + force_tools: bool = True, # Enable forced tool calls by default + ) -> ModelT: + """Generate a structured response conforming to the specified model.""" + # First generate a string response + response_str = await self.generate_str( + message, request_params, force_tools=force_tools + ) + + # Then convert to the structured model using pydantic parsing + try: + # Simple parsing approach - in a real implementation, more + # sophisticated parsing might be needed + return response_model.parse_raw(response_str) + except Exception as e: + self.logger.error(f"Error parsing structured response: {e}") + # If parsing fails, try to create an empty instance + return response_model() + + async def execute_tool_call( + self, + tool_call, + ) -> Dict | None: + """Execute a tool call from the LLM and return the result.""" + if not tool_call: + return None + + tool_call_id = tool_call.id + function_name = tool_call.function.name + + # Parse function arguments from the tool call + function_args = {} + try: + function_args = json.loads(tool_call.function.arguments) + except json.JSONDecodeError as e: + self.logger.error(f"Error parsing function arguments: {e}") + return None + + self.logger.info( + f"Executing tool call: {function_name}", extra={"data": function_args} + ) + + # Get available tools + tools_response = await self.aggregator.list_tools() + available_tools = ( + {tool.name: tool for tool in tools_response.tools} + if tools_response.tools + else {} + ) + + # Tool name processing logic - check if server prefix needs to be added + actual_tool_name = function_name + if function_name not in available_tools: + # Check if it's missing a server prefix + server_prefixed_names = [ + name + for name in available_tools.keys() + if name.endswith(f"_{function_name}") + ] + if server_prefixed_names: + # Use the first matching tool found + actual_tool_name = server_prefixed_names[0] + self.logger.info( + f"Tool name '{function_name}' mapped to '{actual_tool_name}'" + ) + + # Call the tool + try: + # Use the processed tool name for calling + self.logger.info( + f"Calling tool '{actual_tool_name}' with arguments: {function_args}" + ) + result = await self.aggregator.call_tool( + name=actual_tool_name, arguments=function_args + ) + + if result: + # Create tool response message + tool_result = self.type_converter.from_mcp_tool_result( + result, tool_call_id + ) + + # Save tool call results to history + if tool_result: + self.tool_results_history.append(tool_result) + + return tool_result + else: + self.logger.warning(f"Tool '{actual_tool_name}' returned no result") + except Exception as e: + self.logger.error(f"Error executing tool call '{actual_tool_name}': {e}") + return None + + def message_param_str(self, message: Dict) -> str: + """Convert a message parameter to a string representation.""" + if isinstance(message.get("content"), str): + return message.get("content", "") + elif isinstance(message.get("content"), list): + # Extract text content from multi-modal content + text_parts = [] + for part in message.get("content", []): + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + return " ".join(text_parts) + return "" + + def message_str(self, message: CompletionMessage) -> str: + """Convert a message to a string representation.""" + # First check if the message content is valid + if message.content: + if isinstance(message.content, str): + return message.content + elif isinstance(message.content, list): + # Extract text content from multi-modal content + text_parts = [] + for part in message.content: + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + return " ".join(text_parts) + + # If message content is empty but has tool call history, try to process and format the most recent tool result + if hasattr(self, "tool_results_history") and self.tool_results_history: + # Get the last tool result + last_tool_result = self.tool_results_history[-1] + + # Process based on tool call result type + if isinstance(last_tool_result, dict) and "content" in last_tool_result: + try: + return self._format_tool_result(last_tool_result["content"]) + except Exception as e: + self.logger.error(f"Error formatting tool result: {e}") + # Return original content in case of formatting failure + return f"Tool result: {last_tool_result['content']}" + + return "" + + def _format_tool_result(self, result_content: str) -> str: + """Format tool result content based on its structure and type.""" + try: + # Try to parse JSON content + parsed = json.loads(result_content) + + # If it's a standard "result" wrapper format + if isinstance(parsed, dict) and "result" in parsed: + inner_content = parsed["result"] + + # Try to further parse nested JSON + try: + inner_json = json.loads(inner_content) + + # Generic structured result handling - simple JSON formatting + return json.dumps(inner_json, indent=2, ensure_ascii=False) + + except (json.JSONDecodeError, TypeError): + # If inner content is not JSON, return directly + return inner_content + + # For other JSON structures, return formatted JSON + if isinstance(parsed, dict) or isinstance(parsed, list): + return json.dumps(parsed, indent=2, ensure_ascii=False) + + return str(parsed) + + except (json.JSONDecodeError, TypeError): + # If not JSON format, return original content + return result_content diff --git a/src/mcp_agent/workflows/llm/llm_selector.py b/src/mcp_agent/workflows/llm/llm_selector.py index 88dfa23d1..d001357c7 100644 --- a/src/mcp_agent/workflows/llm/llm_selector.py +++ b/src/mcp_agent/workflows/llm/llm_selector.py @@ -318,15 +318,30 @@ def _calculate_max_scores(self, models: List[ModelInfo]) -> Dict[str, float]: def load_default_models() -> List[ModelInfo]: """ We use ArtificialAnalysis benchmarks for determining the best model. + Also loads Zhipu AI models from a separate file. """ + # Load the default models with ( resources.files("mcp_agent.data") .joinpath("artificial_analysis_llm_benchmarks.json") .open() as file ): data = json.load(file) # Array of ModelInfo objects - adapter = TypeAdapter(List[ModelInfo]) - return adapter.validate_python(data) + + # Load Zhipu AI models if available + try: + with ( + resources.files("mcp_agent.data") + .joinpath("zhipuai_models.json") + .open() as file + ): + zhipu_data = json.load(file) + data.extend(zhipu_data) + except Exception as e: + print(f"Warning: Could not load Zhipu AI models: {e}") + + adapter = TypeAdapter(List[ModelInfo]) + return adapter.validate_python(data) def _fuzzy_match(str1: str, str2: str, threshold: float = 0.8) -> bool: