lastmile-ai · mcpflow · Apr 24, 2025
diff --git a/examples/mcp_basic_zhipu_agent/README.md b/examples/mcp_basic_zhipu_agent/README.md
@@ -0,0 +1,60 @@
+# MCP Zhipu AI Agent Example - "Finder" Agent
+
+This example demonstrates how to create and run a basic "Finder" agent using Zhipu AI's GLM-4 model with MCP. The agent can access both `fetch` and `filesystem` MCP servers, allowing it to retrieve information from URLs and the local file system.
+
+## Prerequisites
+
+- Valid Zhipu AI API key
+- Python 3.10+ environment
+
+## Setup
+
+Before running the agent, ensure you have:
+
+1. Register and obtain a Zhipu AI API key:
+   - Visit [Zhipu AI website](https://open.bigmodel.cn/) to register an account
+   - Create an API key in the console
+
+2. Configure the API key:
+   - Create a `mcp_agent.secrets.yaml` file and add your API key:
+     ```yaml
+     zhipu:
+       api_key: "your-zhipu-api-key-here"
+     ```
+
+## Running the Example
+
+Install dependencies and run the example:
+
+```bash
+# Install dependencies
+pip install -e ..
+
+# Run the example
+python main.py
+```
+
+## Example Features
+
+This example demonstrates:
+
+1. Using Zhipu AI's GLM-4 model within the MCP architecture
+2. Retrieving web content via the fetch server
+3. Reading local files via the filesystem server
+4. Multi-turn conversation support
+5. Support for prompts and responses in both English and Chinese
+
+## Supported Models
+
+Zhipu AI provides various large language models, including:
+
+- glm-4 - Zhipu base large model
+- GLM-4-Plus - Enhanced large model
+- GLM-4-Long - Large model with longer context support
+- GLM-4-FlashX-250414 - High-performance Flash model
+- GLM-4-Flash-250414 - Standard Flash model
+- GLM-4-Air-250414 - Lightweight large model
+- glm-4v - Zhipu vision large model
+- glm-3-turbo - Zhipu basic conversation model
+
+This example uses the `glm-4` model by default, but you can change to any supported model in the configuration. 
diff --git a/examples/mcp_basic_zhipu_agent/main.py b/examples/mcp_basic_zhipu_agent/main.py
@@ -0,0 +1,89 @@
+import asyncio
+
+from mcp_agent.config import (
+    Settings,
+    MCPSettings,
+    MCPServerSettings,
+    ZhipuSettings,
+    LoggerSettings,
+)
+from mcp_agent.app import MCPApp
+from mcp_agent.agents.agent import Agent
+from mcp_agent.workflows.llm.augmented_llm_zhipu import ZhipuAugmentedLLM
+from mcp_agent.workflows.llm.augmented_llm import RequestParams
+
+
+async def run():
+    """Run the finder agent example."""
+
+    # Create settings
+    settings = Settings(
+        mcp=MCPSettings(
+            servers={
+                "fetch": MCPServerSettings(
+                    command="uvx",
+                    args=["mcp-server-fetch"],
+                ),
+                "hotnews": MCPServerSettings(
+                    command="npx",
+                    args=["-y", "@mcpflow.io/mcp-hotnews-mcp-server"],
+                ),
+                "time": MCPServerSettings(
+                    command="uvx",
+                    args=["mcp-server-time", "--local-timezone=America/New_York"],
+                ),
+            }
+        ),
+        execution_engine="asyncio",
+        logger=LoggerSettings(type="console", level="info"),
+        zhipu=ZhipuSettings(
+            api_key="<your_api_key>",
+            default_model="glm-4-flashx-250414",  # Use the same model as in augmented_llm_zhipu.py
+        ),
+    )
+
+    # Initialize the app with settings
+    app = MCPApp(name="mcp_basic_zhipu_agent", settings=settings)
+
+    # Run the app
+    async with app.run():
+        # Create an agent that can load different LLMs - Use more concise prompts
+        finder_agent = Agent(
+            name="finder",
+            instruction="""You are an assistant that can use tools to answer questions.
+            """,
+            server_names=["time"],
+        )
+
+        # list tools
+        tools = await finder_agent.list_tools()
+        print("Tools available:", tools)
+
+        # Initialize the agent
+        async with finder_agent:
+            # Create the base agent with default model
+            llm = await finder_agent.attach_llm(ZhipuAugmentedLLM)
+
+            # create request parameters - Explicitly specify the model
+            request_params = RequestParams(
+                model="glm-4-flashx-250414",  # Explicitly specify the model to maintain consistency with augmented_llm_zhipu.py
+                temperature=0.1,
+                maxTokens=4096,
+                systemPrompt=None,  # Don't use systemPrompt to avoid duplication with the instruction
+            )
+
+            try:
+                # Use a very explicit query
+                result = await llm.generate_str(
+                    message="What time is it in New York? Use the time_get_current_time tool with timezone parameter set to 'America/New_York'.",
+                    request_params=request_params,
+                    force_tools=True,
+                )
+                print("\n==== Response using tool ====")
+                print(result)
+            except Exception as e:
+                print(f"Error during model generation: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
diff --git a/examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml b/examples/mcp_basic_zhipu_agent/mcp_agent.config.yaml
@@ -0,0 +1,43 @@
+$schema: ../../schema/mcp-agent.config.schema.json
+
+execution_engine: asyncio
+logger:
+  type: "console"
+  level: "info"
+  progress_display: true
+  path_settings:
+    path_pattern: "logs/mcp-agent-{unique_id}.jsonl"
+    unique_id: "timestamp" # Options: "timestamp" or "session_id"
+    timestamp_format: "%Y%m%d_%H%M%S"
+
+mcp:
+  servers:
+    filesystem:
+      transport: "stdio"
+      command: "npx"
+      args:
+        - "--yes"
+        - "@mtp-devtools/mcp-fs-server"
+        - "--stdio"
+    fetch:
+      transport: "stdio"
+      command: "npx"
+      args:
+        - "--yes"
+        - "@mtp-devtools/mcp-server-fetch"
+        - "--stdio"
+
+# Zhipu AI configuration
+# Note: The actual API key should be in mcp_agent.secrets.yaml, this is just an example
+zhipu:
+  api_key: "<your-api-key>"
+  # Default model can be set to any of the following:
+  # - glm-4: Base large model
+  # - glm-4-plus: Enhanced large model with stronger capabilities
+  # - glm-4-long: Large model with longer context support
+  # - glm-4-flashx-250414: High-performance Flash model
+  # - glm-4-flash-250414: Standard Flash model
+  # - glm-4-air-250414: Lightweight large model
+  # - glm-4v: Zhipu vision large model
+  # - glm-3-turbo: Zhipu basic conversation model
+  default_model: "glm-4-flashx-250414" 
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "prompt-toolkit>=3.0.50",
     "aiohttp>=3.11.13",
     "websockets>=12.0",
+    "zhipuai>=2.1.5",
 ]
 
 [project.optional-dependencies]
@@ -53,6 +54,9 @@ google = [
 cohere = [
     "cohere>=5.13.4",
 ]
+zhipu = [
+    "zhipuai>=2.1.5",
+]
 
 [build-system]
 requires = ["hatchling"]

diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py
@@ -282,6 +282,23 @@ class LoggerSettings(BaseModel):
     """HTTP timeout seconds for event transport"""
 
 
+class ZhipuSettings(BaseModel):
+    """
+    Settings for using Zhipu AI models.
+    """
+
+    api_key: str | None = None
+    """API key for authentication."""
+
+    base_url: str | None = None
+    """Base URL for the Zhipu AI API (optional, defaults to official API endpoint)."""
+
+    default_model: str = "glm-4"
+    """Default model to use."""
+
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
+
 class Settings(BaseSettings):
     """
     Settings class for the MCP Agent application.
@@ -331,6 +348,9 @@ class Settings(BaseSettings):
     usage_telemetry: UsageTelemetrySettings | None = UsageTelemetrySettings()
     """Usage tracking settings for the MCP Agent application"""
 
+    zhipu: ZhipuSettings | None = None
+    """Settings for using Zhipu AI models in the MCP Agent application"""
+
     @classmethod
     def find_config(cls) -> Path | None:
         """Find the config file in the current directory or parent directories."""

diff --git a/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json b/src/mcp_agent/data/artificial_analysis_llm_benchmarks.json
@@ -6246,5 +6246,137 @@
         "bbh_score": null
       }
     }
+  },
+  {
+    "name": "glm-4-flash",
+    "description": "high-performance general-purpose language model",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 2.8,
+        "input_cost_per_1m": 1.8,
+        "output_cost_per_1m": 8.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 900.0,
+        "tokens_per_second": 90.0
+      },
+      "intelligence": {
+        "quality_score": 76.0,
+        "mmlu_score": 82.0,
+        "gsm8k_score": 80.0,
+        "bbh_score": 72.0
+      }
+    }
+  },
+  {
+    "name": "glm-4-plus",
+    "description": "enhanced version of the large model, with stronger intelligence",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 4.0,
+        "input_cost_per_1m": 2.5,
+        "output_cost_per_1m": 12.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 1100.0,
+        "tokens_per_second": 85.0
+      },
+      "intelligence": {
+        "quality_score": 82.0,
+        "mmlu_score": 85.0,
+        "gsm8k_score": 83.0,
+        "bbh_score": 75.0
+      }
+    }
+  },
+  {
+    "name": "glm-4-long",
+    "description": "supports longer context large model",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 6.0,
+        "input_cost_per_1m": 4.0,
+        "output_cost_per_1m": 14.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 1200.0,
+        "tokens_per_second": 75.0
+      },
+      "intelligence": {
+        "quality_score": 80.0,
+        "mmlu_score": 83.0,
+        "gsm8k_score": 82.0,
+        "bbh_score": 74.0
+      }
+    }
+  },
+  {
+    "name": "glm-4-flashx-250414",
+    "description": "high-performance Flash model, faster response speed",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 3.0,
+        "input_cost_per_1m": 1.9,
+        "output_cost_per_1m": 9.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 800.0,
+        "tokens_per_second": 100.0
+      },
+      "intelligence": {
+        "quality_score": 78.0,
+        "mmlu_score": 80.0,
+        "gsm8k_score": 78.0,
+        "bbh_score": 72.0
+      }
+    }
+  },
+  {
+    "name": "glm-4-flash-250414",
+    "description": "standard Flash model, balanced speed and performance",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 2.5,
+        "input_cost_per_1m": 1.6,
+        "output_cost_per_1m": 7.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 850.0,
+        "tokens_per_second": 95.0
+      },
+      "intelligence": {
+        "quality_score": 75.0,
+        "mmlu_score": 78.0,
+        "gsm8k_score": 76.0,
+        "bbh_score": 70.0
+      }
+    }
+  },
+  {
+    "name": "glm-4-air-250414",
+    "description": "lightweight large model, lower resource consumption",
+    "provider": "ZhipuAI",
+    "metrics": {
+      "cost": {
+        "blended_cost_per_1m": 2.0,
+        "input_cost_per_1m": 1.2,
+        "output_cost_per_1m": 6.0
+      },
+      "speed": {
+        "time_to_first_token_ms": 750.0,
+        "tokens_per_second": 110.0
+      },
+      "intelligence": {
+        "quality_score": 70.0,
+        "mmlu_score": 72.0,
+        "gsm8k_score": 70.0,
+        "bbh_score": 65.0
+      }
+    }
   }
-]
+]
diff --git a/src/mcp_agent/workflows/llm/augmented_llm_azure.py b/src/mcp_agent/workflows/llm/augmented_llm_azure.py
@@ -125,9 +125,7 @@ async def generate(self, message, request_params: RequestParams | None = None):
 
         system_prompt = self.instruction or params.systemPrompt
         if system_prompt and len(messages) == 0:
-            messages.append(
-                SystemMessage(content=system_prompt)
-            )
+            messages.append(SystemMessage(content=system_prompt))
 
         if isinstance(message, str):
             messages.append(UserMessage(content=message))