diff --git a/examples/mcp_basic_qwen_agent/README.md b/examples/mcp_basic_qwen_agent/README.md new file mode 100644 index 000000000..6e87c479c --- /dev/null +++ b/examples/mcp_basic_qwen_agent/README.md @@ -0,0 +1,23 @@ +# MCP Qwen Agent Example - "Finder" Agent + +This example demonstrates how to create and run a basic "Finder" Agent using Qwen models via Ollama's OpenAI-compatible API and MCP. The Agent has access to both the `fetch` and `filesystem` MCP servers, enabling it to retrieve information from URLs and the local file system. + +## Prerequisites + +- [Ollama](https://ollama.ai/) installed and running +- Qwen model pulled in Ollama (run `ollama pull qwen2.5:32b`) + +## Setup + +Before running the agent, ensure you have Ollama installed and the Qwen models pulled: + +```bash +# Install Ollama (Mac/Linux) +curl -fsSL https://ollama.com/install.sh | sh + +# Start the Ollama service +ollama serve + +# Pull the Qwen model (in another terminal) +ollama pull qwen2.5:32b +``` diff --git a/examples/mcp_basic_qwen_agent/main.py b/examples/mcp_basic_qwen_agent/main.py new file mode 100644 index 000000000..328a3eac2 --- /dev/null +++ b/examples/mcp_basic_qwen_agent/main.py @@ -0,0 +1,95 @@ +import asyncio +import os +import time + +from mcp_agent.app import MCPApp +from mcp_agent.config import ( + Settings, + LoggerSettings, + MCPSettings, + MCPServerSettings, + QwenSettings, +) +from mcp_agent.agents.agent import Agent +from mcp_agent.workflows.llm.augmented_llm import RequestParams +from mcp_agent.workflows.llm.augmented_llm_qwen import QwenAugmentedLLM + +settings = Settings( + execution_engine="asyncio", + logger=LoggerSettings(type="file", level="debug"), + mcp=MCPSettings( + servers={ + "fetch": MCPServerSettings( + command="uvx", + args=["mcp-server-fetch"], + ), + "filesystem": MCPServerSettings( + command="npx", + args=["-y", "@modelcontextprotocol/server-filesystem"], + ), + } + ), + qwen=QwenSettings( + api_key="ollama", # Default for Ollama + base_url="http://localhost:11434/v1", # Ollama's OpenAI-compatible endpoint + default_model="qwen2.5:32b", + ), +) + +# Settings can either be specified programmatically, +# or loaded from mcp_agent.config.yaml/mcp_agent.secrets.yaml +app = MCPApp(name="mcp_basic_qwen_agent") # settings=settings) + + +async def example_usage(): + async with app.run() as agent_app: + logger = agent_app.logger + context = agent_app.context + + logger.info("Current config:", data=context.config.model_dump()) + + # Add the current directory to the filesystem server's args + context.config.mcp.servers["filesystem"].args.extend([os.getcwd()]) + + finder_agent = Agent( + name="finder", + instruction="""You are an agent with access to the filesystem, + as well as the ability to fetch URLs. Your job is to identify + the closest match to a user's request, make the appropriate tool calls, + and return the URI and CONTENTS of the closest match.""", + server_names=["fetch", "filesystem"], + ) + + async with finder_agent: + logger.info("finder: Connected to server, calling list_tools...") + result = await finder_agent.list_tools() + logger.info("Tools available:", data=result.model_dump()) + + llm = await finder_agent.attach_llm(QwenAugmentedLLM) + result = await llm.generate_str( + message="Print the contents of mcp_agent.config.yaml verbatim", + ) + logger.info(f"mcp_agent.config.yaml contents: {result}") + + result = await llm.generate_str( + message="Print the first 2 paragraphs of https://modelcontextprotocol.io/introduction", + ) + logger.info(f"First 2 paragraphs of Model Context Protocol docs: {result}") + + # Multi-turn conversations with specific model + result = await llm.generate_str( + message="Summarize those paragraphs in a 128 character tweet", + request_params=RequestParams( + model="qwen2.5:32b", # You can specify different Qwen models + ), + ) + logger.info(f"Paragraph as a tweet: {result}") + + +if __name__ == "__main__": + start = time.time() + asyncio.run(example_usage()) + end = time.time() + t = end - start + + print(f"Total run time: {t:.2f}s") diff --git a/examples/mcp_basic_qwen_agent/mcp_agent.config.yaml b/examples/mcp_basic_qwen_agent/mcp_agent.config.yaml new file mode 100644 index 000000000..c21dfa9d6 --- /dev/null +++ b/examples/mcp_basic_qwen_agent/mcp_agent.config.yaml @@ -0,0 +1,26 @@ +$schema: ../../schema/mcp-agent.config.schema.json + +execution_engine: asyncio +logger: + transports: [console, file] + level: debug + progress_display: true + path_settings: + path_pattern: "logs/mcp-agent-{unique_id}.jsonl" + unique_id: "timestamp" # Options: "timestamp" or "session_id" + timestamp_format: "%Y%m%d_%H%M%S" + +mcp: + servers: + fetch: + command: "uvx" + args: ["mcp-server-fetch"] + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem"] + +qwen: + # Ollama OpenAI-compatible API settings + api_key: "ollama" + base_url: "http://localhost:11434/v1" + default_model: "qwen2.5:32b" diff --git a/schema/mcp-agent.config.schema.json b/schema/mcp-agent.config.schema.json index 73c6be049..6d6773b8a 100644 --- a/schema/mcp-agent.config.schema.json +++ b/schema/mcp-agent.config.schema.json @@ -33,10 +33,7 @@ "type": "string" } }, - "required": [ - "api_key", - "endpoint" - ], + "required": ["api_key", "endpoint"], "title": "AzureSettings", "type": "object" }, @@ -188,10 +185,7 @@ }, "unique_id": { "default": "timestamp", - "enum": [ - "timestamp", - "session_id" - ], + "enum": ["timestamp", "session_id"], "title": "Unique Id", "type": "string" }, @@ -209,24 +203,14 @@ "properties": { "type": { "default": "console", - "enum": [ - "none", - "console", - "file", - "http" - ], + "enum": ["none", "console", "file", "http"], "title": "Type", "type": "string" }, "transports": { "default": [], "items": { - "enum": [ - "none", - "console", - "file", - "http" - ], + "enum": ["none", "console", "file", "http"], "type": "string" }, "title": "Transports", @@ -235,12 +219,7 @@ }, "level": { "default": "info", - "enum": [ - "debug", - "info", - "warning", - "error" - ], + "enum": ["debug", "info", "warning", "error"], "title": "Level", "type": "string", "description": "Minimum logging level" @@ -361,9 +340,7 @@ "description": "Optional URI alias for presentation to the server" } }, - "required": [ - "uri" - ], + "required": ["uri"], "title": "MCPRootSettings", "type": "object" }, @@ -418,11 +395,7 @@ }, "transport": { "default": "stdio", - "enum": [ - "stdio", - "sse", - "websocket" - ], + "enum": ["stdio", "sse", "websocket"], "title": "Transport", "type": "string", "description": "The transport mechanism." @@ -580,11 +553,7 @@ }, "reasoning_effort": { "default": "medium", - "enum": [ - "low", - "medium", - "high" - ], + "enum": ["low", "medium", "high"], "title": "Reasoning Effort", "type": "string" }, @@ -699,10 +668,7 @@ "title": "Api Key" } }, - "required": [ - "host", - "task_queue" - ], + "required": ["host", "task_queue"], "title": "TemporalSettings", "type": "object" }, @@ -724,6 +690,38 @@ }, "title": "UsageTelemetrySettings", "type": "object" + }, + "QwenSettings": { + "additionalProperties": true, + "description": "Settings for using Qwen models through Ollama's OpenAI-compatible API.", + "properties": { + "api_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Api Key" + }, + "base_url": { + "title": "Base Url", + "type": "string", + "description": "Base URL for the Ollama API (e.g., http://localhost:11434/v1)" + }, + "default_model": { + "default": "qwen2.5-coder-32b-instruct", + "title": "Default Model", + "type": "string", + "description": "Default Qwen model to use" + } + }, + "required": ["base_url"], + "title": "QwenSettings", + "type": "object" } }, "additionalProperties": true, @@ -745,10 +743,7 @@ }, "execution_engine": { "default": "asyncio", - "enum": [ - "asyncio", - "temporal" - ], + "enum": ["asyncio", "temporal"], "title": "Execution Engine", "type": "string", "description": "Execution engine for the MCP Agent application" @@ -896,9 +891,21 @@ "enable_detailed_telemetry": false }, "description": "Usage tracking settings for the MCP Agent application" + }, + "qwen": { + "anyOf": [ + { + "$ref": "#/$defs/QwenSettings" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Settings for using Qwen models through Ollama's OpenAI-compatible API" } }, "title": "MCP Agent Configuration Schema", "type": "object", "$schema": "http://json-schema.org/draft-07/schema#" -} \ No newline at end of file +} diff --git a/src/mcp_agent/config.py b/src/mcp_agent/config.py index 5a6f51910..1f93bea0c 100644 --- a/src/mcp_agent/config.py +++ b/src/mcp_agent/config.py @@ -282,6 +282,23 @@ class LoggerSettings(BaseModel): """HTTP timeout seconds for event transport""" +class QwenSettings(BaseModel): + """ + Settings for using Qwen models through Ollama's OpenAI-compatible API. + """ + + api_key: str | None = None + """API key for authentication.""" + + base_url: str + """Base URL for the Ollama API (e.g., http://localhost:11434/v1).""" + + default_model: str = "qwen2.5-coder-32b-instruct" + """Default Qwen model to use.""" + + model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) + + class Settings(BaseSettings): """ Settings class for the MCP Agent application. @@ -331,6 +348,9 @@ class Settings(BaseSettings): usage_telemetry: UsageTelemetrySettings | None = UsageTelemetrySettings() """Usage tracking settings for the MCP Agent application""" + qwen: QwenSettings | None = None + """Settings for using Qwen models in the MCP Agent application""" + @classmethod def find_config(cls) -> Path | None: """Find the config file in the current directory or parent directories.""" diff --git a/src/mcp_agent/workflows/llm/augmented_llm_qwen.py b/src/mcp_agent/workflows/llm/augmented_llm_qwen.py new file mode 100644 index 000000000..7f6560879 --- /dev/null +++ b/src/mcp_agent/workflows/llm/augmented_llm_qwen.py @@ -0,0 +1,374 @@ +import json +import re +from typing import Iterable, List, Optional, Type, Union, Dict, Any +from datetime import datetime + +from openai import OpenAI +from openai.types.chat import ( + ChatCompletionAssistantMessageParam, + ChatCompletionMessage, + ChatCompletionMessageParam, + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +) +from mcp.types import ( + CallToolRequestParams, + CallToolRequest, + ModelPreferences, +) + +from mcp_agent.workflows.llm.augmented_llm import ( + AugmentedLLM, + ModelT, + RequestParams, +) +from mcp_agent.workflows.llm.augmented_llm_openai import ( + MCPOpenAITypeConverter, + mcp_content_to_openai_content, + openai_content_to_mcp_content, +) +from mcp_agent.logging.logger import get_logger + + +class QwenAugmentedLLM(AugmentedLLM[ChatCompletionMessageParam, ChatCompletionMessage]): + """ + An implementation of AugmentedLLM that uses Qwen2.5 models through Ollama's + OpenAI-compatible API interface, with support for Qwen's specific function + calling template format. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, type_converter=MCPOpenAITypeConverter, **kwargs) + + self.provider = "Qwen" + # Initialize logger with name if available + self.logger = get_logger(f"{__name__}.{self.name}" if self.name else __name__) + + self.model_preferences = self.model_preferences or ModelPreferences( + costPriority=0.3, + speedPriority=0.4, + intelligencePriority=0.3, + ) + + # Get default model from config if available + chosen_model = "qwen2.5-coder-32b-instruct" # Fallback default + + if self.context and self.context.config and self.context.config.qwen: + if hasattr(self.context.config.qwen, "default_model"): + chosen_model = self.context.config.qwen.default_model + + self.default_request_params = self.default_request_params or RequestParams( + model=chosen_model, + modelPreferences=self.model_preferences, + maxTokens=4096, + systemPrompt=self.instruction, + parallel_tool_calls=False, + max_iterations=10, + use_history=True, + ) + + @classmethod + def convert_message_to_message_param( + cls, message: ChatCompletionMessage, **kwargs + ) -> ChatCompletionMessageParam: + """Convert a response object to an input parameter object to allow LLM calls to be chained.""" + assistant_message_params = { + "role": "assistant", + "audio": message.audio, + "refusal": message.refusal, + **kwargs, + } + if message.content is not None: + assistant_message_params["content"] = message.content + if message.tool_calls is not None: + assistant_message_params["tool_calls"] = message.tool_calls + + return ChatCompletionAssistantMessageParam(**assistant_message_params) + + def _format_qwen_system_message( + self, instruction: str, tools: List[Dict[str, Any]] + ) -> str: + """Format a system message with tools in Qwen2.5's expected format.""" + current_date = datetime.now().strftime("%Y-%m-%d") + + system_message = f"""You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + +Current Date: {current_date} + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +""" + + # Add tool definitions + for tool in tools: + system_message += json.dumps(tool) + "\n" + + system_message += """ + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } + + +""" + + # Add any additional instructions + if instruction: + system_message += f"\n{instruction}" + + return system_message + + def _process_tool_response(self, tool_call_id: str, content: str) -> str: + """Format a tool response in Qwen2.5's expected format.""" + return f"\n{content}\n" + + async def generate(self, message, request_params: RequestParams | None = None): + """ + Process a query using an LLM and available tools. + This implementation uses Qwen2.5 with Ollama's OpenAI-compatible API. + """ + config = self.context.config + openai_client = OpenAI( + api_key=config.qwen.api_key, base_url=config.qwen.base_url + ) + messages: List[ChatCompletionMessageParam] = [] + params = self.get_request_params(request_params) + + # Get available tools + response = await self.aggregator.list_tools() + available_tools: List[Dict[str, Any]] = [ + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.inputSchema, + }, + } + for tool in response.tools + ] + + # Format system message with Qwen-specific template + system_prompt = self._format_qwen_system_message( + self.instruction or params.systemPrompt, available_tools + ) + + if len(messages) == 0: + messages.append( + ChatCompletionSystemMessageParam(role="system", content=system_prompt) + ) + + # Add history if requested + if params.use_history: + history = self.history.get() + # Skip the initial system message from history if we just added one + if ( + history + and isinstance(history[0], dict) + and history[0].get("role") == "system" + and len(messages) > 0 + and isinstance(messages[0], dict) + and messages[0].get("role") == "system" + ): + history = history[1:] + messages.extend(history) + + # Add the current message + if isinstance(message, str): + messages.append( + ChatCompletionUserMessageParam(role="user", content=message) + ) + elif isinstance(message, list): + messages.extend(message) + else: + messages.append(message) + + responses: List[ChatCompletionMessage] = [] + model = await self.select_model(params) + + for i in range(params.max_iterations): + arguments = { + "model": model, + "messages": messages, + "max_tokens": params.maxTokens, + "stop": params.stopSequences, + } + + if params.metadata: + arguments = {**arguments, **params.metadata} + + self.logger.debug(f"{arguments}") + self._log_chat_progress(chat_turn=len(messages) // 2, model=model) + + executor_result = await self.executor.execute( + openai_client.chat.completions.create, **arguments + ) + + response = executor_result[0] + + self.logger.debug( + "Qwen ChatCompletion response:", + data=response, + ) + + if isinstance(response, BaseException): + self.logger.error(f"Error: {response}") + break + + if not response.choices or len(response.choices) == 0: + # No response from the model, we're done + break + + choice = response.choices[0] + message = choice.message + responses.append(message) + + # Extract tool calls from the content using regex + content = message.content or "" + tool_calls_matches = re.findall( + r"(.*?)", content, re.DOTALL + ) + + tool_tasks = [] + + for tool_call_match in tool_calls_matches: + try: + tool_call_data = json.loads(tool_call_match.strip()) + tool_name = tool_call_data.get("name") + tool_args = tool_call_data.get("arguments", {}) + + if not tool_name: + continue + + tool_call_id = f"call_{len(tool_tasks)}" + + tool_call_request = CallToolRequest( + method="tools/call", + params=CallToolRequestParams( + name=tool_name, arguments=tool_args + ), + ) + + # Add the task + tool_tasks.append( + self.call_tool( + request=tool_call_request, tool_call_id=tool_call_id + ) + ) + + except json.JSONDecodeError: + self.logger.error(f"Failed to parse tool call: {tool_call_match}") + continue + + if tool_tasks: + # Execute all tools in parallel + tool_results = await self.executor.execute(*tool_tasks) + self.logger.debug( + f"Iteration {i}: Tool call results: {str(tool_results) if tool_results else 'None'}" + ) + + # Create user message with tool responses + tool_responses = [] + for idx, result in enumerate(tool_results): + if isinstance(result, BaseException): + self.logger.error( + f"Warning: Unexpected error during tool execution: {result}. Continuing..." + ) + continue + + if result.content: + tool_id = f"call_{idx}" + content_str = "\n".join( + json.dumps(content.__dict__, default=str) + for content in result.content + ) + tool_responses.append( + self._process_tool_response(tool_id, content_str) + ) + + if tool_responses: + tool_response_message = ChatCompletionUserMessageParam( + role="user", content="\n".join(tool_responses) + ) + messages.append(tool_response_message) + else: + # No tool calls, we're done with this iteration + break + + if params.use_history: + self.history.set(messages) + + self._log_chat_finished(model=model) + + return responses + + async def generate_str( + self, + message, + request_params: RequestParams | None = None, + ): + """ + Process a query using Qwen2.5 and return the result as a string. + """ + responses = await self.generate( + message=message, + request_params=request_params, + ) + + final_text: List[str] = [] + + for response in responses: + content = response.content + if not content: + continue + + # Remove any tool_call XML tags from the response + cleaned_content = re.sub( + r".*?", "", content, flags=re.DOTALL + ) + + if cleaned_content.strip(): + final_text.append(cleaned_content.strip()) + + return "\n".join(final_text) + + async def generate_structured( + self, + message, + response_model: Type[ModelT], + request_params: RequestParams | None = None, + ) -> ModelT: + """ + Generate a structured response using Instructor with Qwen2.5. + """ + import instructor + + response = await self.generate_str( + message=message, + request_params=request_params, + ) + + client = instructor.from_openai( + OpenAI( + api_key=self.context.config.qwen.api_key, + base_url=self.context.config.qwen.base_url, + ), + mode=instructor.Mode.TOOLS, + ) + + params = self.get_request_params(request_params) + model = await self.select_model(params) + + structured_response = client.chat.completions.create( + model=model, + response_model=response_model, + messages=[ + {"role": "user", "content": response}, + ], + ) + + return structured_response