remove tapeagents dep from backends core, fixes

ollmer · ollmer · commit 4117e0a2049e · 2025-11-07T17:40:00.000Z
diff --git a/src/agentlab/backends/browser/base.py b/src/agentlab/backends/browser/base.py
@@ -1,14 +1,75 @@
 import logging
+from typing import Any, Callable, Literal
 
-from mcp.types import ImageContent, TextContent
+from langchain_core.utils.function_calling import convert_to_openai_tool
 from PIL import Image
 from pydantic import BaseModel
-from tapeagents.mcp import MCPEnvironment
-from tapeagents.tool_calling import FunctionCall, ToolCallAction, ToolSpec
 
 logger = logging.getLogger(__name__)
 
 
+class FunctionCall(BaseModel):
+    """
+    A class representing a function call.
+
+    Attributes:
+        name (str): The name of the function being called.
+        arguments (Any): The arguments to be passed to the function.
+    """
+
+    name: str
+    arguments: Any
+
+
+class FunctionSpec(BaseModel):
+    """
+    A class representing the specification of a function.
+
+    Attributes:
+        name (str): The name of the function.
+        description (str): A brief description of the function.
+        parameters (dict): A dictionary containing the parameters of the function.
+    """
+
+    name: str
+    description: str
+    parameters: dict
+
+
+class ToolCallAction(BaseModel):
+    id: str = ""
+    function: FunctionCall
+
+
+class ToolSpec(BaseModel):
+    """
+    ToolSpec is a model that represents a tool specification with a type and a function.
+
+    Attributes:
+        type (Literal["function"]): The type of the tool, which is always "function".
+        function (FunctionSpec): The specification of the function.
+    """
+
+    type: Literal["function"] = "function"
+    function: FunctionSpec
+
+    def description(self) -> str:
+        return f"{self.function.name} - {self.function.description}"
+
+    @classmethod
+    def from_function(cls, function: Callable):
+        """
+        Creates an instance of the class by validating the model from a given function.
+
+        Args:
+            function (Callable): The function to be converted and validated.
+
+        Returns:
+            (ToolSpec): An instance of the class with the validated model.
+        """
+        return cls.model_validate(convert_to_openai_tool(function))
+
+
 class BrowserBackend(BaseModel):
     def initialize(self) -> None:
         raise NotImplementedError
@@ -33,32 +94,3 @@ def actions(self) -> tuple[ToolSpec]:
 
     def close(self) -> None:
         raise NotImplementedError
-
-
-class MCPBrowserBackend(BrowserBackend):
-    config_path: str
-    _mcp = None
-
-    def initialize(self) -> None:
-        self._mcp = MCPEnvironment(config_path=self.config_path)
-        self._mcp.initialize()
-
-    def step(self, action: ToolCallAction) -> dict:
-        contents = self._call_mcp(action)
-        text = "\n".join([c.text for c in contents if c.type == "text"])
-        return {"pruned_html": text, "axtree_txt": text}
-
-    def call_tool(self, tool_name: str, arguments: dict) -> list[TextContent | ImageContent]:
-        return self._call_mcp(
-            ToolCallAction(function=FunctionCall(name=tool_name, arguments=arguments))
-        )
-
-    def _call_mcp(self, action: ToolCallAction) -> list[TextContent | ImageContent]:
-        tool_result = self._mcp.step(action)
-        return tool_result.content.content
-
-    def actions(self) -> tuple[ToolSpec]:
-        return self._mcp.actions()
-
-    def close(self) -> None:
-        self._mcp.close()
diff --git a/src/agentlab/backends/browser/mcp.py b/src/agentlab/backends/browser/mcp.py
@@ -0,0 +1,169 @@
+import asyncio
+import json
+import logging
+import os
+from contextlib import AsyncExitStack
+from datetime import timedelta
+from typing import Any
+
+from mcp import ClientSession, StdioServerParameters, stdio_client
+from mcp import Tool as MCPTool
+from mcp.types import CallToolResult, ImageContent, TextContent
+
+from agentlab.backends.browser.base import BrowserBackend, FunctionSpec, ToolCallAction, ToolSpec
+
+logger = logging.getLogger(__name__)
+
+
+class MCPClient:
+    def __init__(self, config_path: str, read_timeout_seconds: int = 10) -> None:
+        self.servers = self.load_config(config_path)
+        self.sessions: dict[str, ClientSession] = {}
+        self.tools: dict[str, MCPTool] = {}
+        self.tool_to_server: dict[str, str] = {}
+        self.read_timeout_seconds = read_timeout_seconds
+        self.exit_stack = AsyncExitStack()
+        self.loop = None
+
+    def initialize(self):
+        try:
+            self.loop = asyncio.get_event_loop()
+        except RuntimeError:
+            self.loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self.loop)
+        self.loop.run_until_complete(self.start_servers())
+
+    async def ainitialize(self) -> None:
+        await self.start_servers()
+
+    async def start_servers(self):
+        for server_name, server_params in self.servers.items():
+            stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params))
+            stdio, write = stdio_transport
+            session = await self.exit_stack.enter_async_context(
+                ClientSession(
+                    stdio, write, read_timeout_seconds=timedelta(seconds=self.read_timeout_seconds)
+                )
+            )
+            await session.initialize()
+            self.sessions[server_name] = session
+            response = await session.list_tools()
+            for tool in response.tools:
+                if tool.name in self.tools:
+                    raise Exception(
+                        f"Tools conflict! Tool {tool.name} already provided by server '{self.tool_to_server[tool.name]}'"
+                    )
+                self.tools[tool.name] = tool
+                self.tool_to_server[tool.name] = server_name
+            logger.info(
+                f"Connected to MCP server '{server_name}' with tools: {[tool.name for tool in response.tools]}"
+            )
+        logger.info(f"Started {len(self.servers)} MCP servers")
+
+    def load_config(self, config_path) -> dict[str, StdioServerParameters]:
+        assert os.path.exists(config_path), f"Config path {config_path} does not exist"
+        self.config_path = config_path
+
+        try:
+            with open(config_path, "r") as f:
+                self.config = json.load(f)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Failed to parse {config_path}, invalid json: {e}")
+        try:
+            server_configs: dict[str, dict] = self.config["mcpServers"]
+            assert isinstance(server_configs, dict), "mcpServers must be a dict"
+            assert len(server_configs) > 0, "mcpServers dict is empty"
+        except Exception as e:
+            raise ValueError(f"Failed to get MCP server configs from {config_path}: {e}")
+
+        servers: dict[str, StdioServerParameters] = {}
+        for server_name, server_config_dict in server_configs.items():
+            try:
+                server_config_dict = self.prepare_env_vars(server_config_dict)
+                server_params = StdioServerParameters.model_validate(server_config_dict)
+            except Exception as e:
+                raise ValueError(f"Failed to parse server config {server_config_dict}: {e}")
+            servers[server_name] = server_params
+        logger.info(f"Loaded {len(servers)} MCP server configs from {config_path}")
+        return servers
+
+    def prepare_env_vars(self, server_config_dict: dict) -> dict:
+        if server_env := server_config_dict.get("env"):
+            for env_var, env_value in server_env.items():
+                if (
+                    env_var in os.environ and not env_value
+                ):  # reuse existing env var value if not set in config
+                    logger.info(f"Set mcp server env var {env_var} from current environment")
+                    server_config_dict["env"][env_var] = os.environ[env_var]
+        return server_config_dict
+
+    def call_tool(self, tool_name: str, tool_args: dict[str, Any]) -> CallToolResult:
+        result = self.loop.run_until_complete(self.acall_tool(tool_name, tool_args))
+        return result
+
+    async def acall_tool(self, tool_name: str, tool_args: dict[str, Any]) -> CallToolResult:
+        server_name = self.check_tool_exists(tool_name)
+        result = await self._call_tool(server_name, tool_name, tool_args)
+        return result
+
+    async def _call_tool(
+        self, server_name: str, tool_name: str, tool_args: dict[str, Any]
+    ) -> CallToolResult:
+        try:
+            session = self.sessions[server_name]
+            result = await session.call_tool(tool_name, tool_args)
+        except Exception as e:
+            logger.exception(f"Error calling tool {tool_name}: {e}")
+            raise e
+        return result
+
+    def check_tool_exists(self, tool_name):
+        try:
+            server_name = self.tool_to_server[tool_name]
+        except KeyError:
+            raise Exception(f"Tool {tool_name} not found in any of the MCP servers")
+        return server_name
+
+    def actions(self) -> tuple[ToolSpec]:
+        return (
+            ToolSpec(
+                function=FunctionSpec(
+                    name=tool.name, description=tool.description or "", parameters=tool.inputSchema
+                )
+            )
+            for tool in self.tools.values()
+        )
+
+    async def close(self) -> None:
+        await self.exit_stack.aclose()
+
+
+class MCPBrowserBackend(BrowserBackend):
+    config_path: str
+    _mcp = None
+
+    def initialize(self) -> None:
+        self._mcp = MCPClient(config_path=self.config_path)
+        self._mcp.initialize()
+
+    def step(self, action: ToolCallAction) -> dict:
+        contents = self.call_tool(action.function.name, action.function.arguments)
+        text = "\n".join([c.text for c in contents if c.type == "text"])
+        images = [c for c in contents if c.type == "image"]
+        return {
+            "pruned_html": text,
+            "axtree_txt": text,
+            "screenshot": images[-1] if images else None,
+        }
+
+    def call_tool(self, tool_name: str, arguments: dict) -> list[TextContent | ImageContent]:
+        tool_result = self._mcp.call_tool(tool_name, arguments)
+        if tool_result.isError:
+            return [TextContent(text=f"Error calling tool {tool_name}: {tool_result.error}")]
+        return tool_result.content
+
+    def actions(self) -> tuple[ToolSpec]:
+        return self._mcp.actions()
+
+    def close(self) -> None:
+        self._mcp.close()
diff --git a/src/agentlab/benchmarks/miniwob/benchmark.py b/src/agentlab/benchmarks/miniwob/benchmark.py
@@ -23,6 +23,7 @@ class MiniWobBenchmark(AbstractBenchmark):
     high_level_action_set_args: ToolsActionSet = None
 
     def model_post_init(self, __context: Any) -> None:
+        self.name = f"miniwob_{self.backend.__class__.__name__.lower()}"
         self.env_args_list = []
         if self.dataset is None:
             self.dataset = get_miniwob_tasks()
diff --git a/src/agentlab/benchmarks/miniwob/task.py b/src/agentlab/benchmarks/miniwob/task.py
@@ -31,6 +31,8 @@ class MiniWobTask(AbstractWebTask):
     ]
 
     def model_post_init(self, __context: Any):
+        if self.base_url.endswith("/"):
+            self.base_url = self.base_url[:-1]
         self.url = f"{self.base_url}/{self.subdomain}.html"
 
     def get_setup_js(self) -> str:

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,8 @@ class MiniWobTask(AbstractWebTask):`
`31`	`31`	`]`
`32`	`32`
`33`	`33`	`def model_post_init(self, __context: Any):`
	`34`	`+ if self.base_url.endswith("/"):`
	`35`	`+ self.base_url = self.base_url[:-1]`
`34`	`36`	`self.url = f"{self.base_url}/{self.subdomain}.html"`
`35`	`37`
`36`	`38`	`def get_setup_js(self) -> str:`