MCP Server for the gym (#337)

ollmer · TLSDC · web-flow · commit 544bf9f06bda · 2025-04-22T12:18:59.000+02:00
* basic MCP server that exposes all tools from the action set
* separate pre and post-processing of a gym step from the step execution
* MCP server wraps every function into an async wrapper that calls the gym and sets up required global vars
---------

Co-authored-by: ThibaultLSDC &lt;thibault.de.chezelles@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -150,3 +150,5 @@ tests/assistantbench/assistantbench-predictions-test.jsonl
 
 # weblinx
 bg_wl_data/
+
+uv.lock
diff --git a/browsergym/core/requirements.txt b/browsergym/core/requirements.txt
@@ -5,3 +5,4 @@ pyparsing>=3
 Pillow>=10.1
 beautifulsoup4>=4.12
 lxml>=4.9
+mcp[cli]>=1.6.0
diff --git a/browsergym/core/src/browsergym/core/env.py b/browsergym/core/src/browsergym/core/env.py
@@ -4,7 +4,7 @@
 import time
 from abc import ABC
 from pathlib import Path
-from typing import Literal, Optional
+from typing import Any, Callable, Literal, Optional
 
 import gymnasium as gym
 import numpy as np
@@ -371,10 +371,7 @@ def override_property(task, env, property):
 
         return obs, info
 
-    def step(self, action: str) -> tuple:
-
-        self.last_action = action
-
+    def pre_step(self) -> tuple[dict[str, Any], Callable, Callable]:
         info = {}
         info["action_exec_start"] = time.time()
         info["action_exec_timeout"] = 0
@@ -391,7 +388,25 @@ def report_infeasible_instructions(reason: str):
             self.infeasible_message_received = True
 
         # try to execute the action
-        logger.debug(f"Executing action")
+        logger.debug("Executing action")
+        return info, send_message_to_user, report_infeasible_instructions
+
+    def step(self, action: str) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
+        """
+        Execute the action in the environment.
+
+        Args:
+            action: the action to execute. This should be a string with code or a function call
+
+        Returns:
+            obs: the observation after executing the action
+            reward: the reward received after executing the action
+            terminated: whether the episode is terminated or not
+            truncated: whether the episode is truncated or not
+            info: additional information about the step
+        """
+        self.last_action = action
+        info, send_message_to_user, report_infeasible_instructions = self.pre_step()
         try:
             if self.action_mapping:
                 code = self.action_mapping(action)
@@ -409,7 +424,25 @@ def report_infeasible_instructions(reason: str):
             match = re.match("TimeoutError: Timeout ([0-9]+)ms exceeded.", self.last_action_error)
             if match:
                 info["action_exec_timeout"] = float(match.groups()[0]) / 1000  # ms to sec
-        logger.debug(f"Action executed")
+        return self.post_step(info)
+
+    def post_step(
+        self, info: dict[str, Any], validate: bool = True
+    ) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
+        """
+        Post step method, called after executing the action.
+        This method is responsible for extracting the observation after the action.
+        It also prepares reward, task status, user message and other step info.
+        Args:
+            info: dictionary containing information about the step
+        Returns:
+            obs: the observation after executing the action
+            reward: the reward received after executing the action
+            terminated: whether the episode is terminated or not
+            truncated: whether the episode is truncated or not
+            info: additional information about the step
+        """
+        logger.debug("Action executed")
         info["action_exec_stop"] = time.time()
 
         # wait a bit (for the JavaScript callback to set the active page)
@@ -419,35 +452,41 @@ def report_infeasible_instructions(reason: str):
         # wait for the network to idle before extracting the observation, reward etc.
         self._wait_dom_loaded()
 
-        # after the action is executed, the active page might have changed
-        # perform a safety check
-        self._active_page_check()
-        logger.debug(f"Active page checked")
-
-        # if asked, wait for user message
-        self._wait_for_user_message()
-        logger.debug(f"User message done")
-
-        logger.debug(f"Initiating task validation")
-        # extract reward, done, user_message, info (task-specific)
-        reward, done, user_message, task_info = self._task_validate()
-        info["task_info"] = task_info
-        logger.debug(f"Task validation done")
+        if validate:
+            # after the action is executed, the active page might have changed
+            # perform a safety check
+            self._active_page_check()
+            logger.debug("Active page checked")
+
+            # if asked, wait for user message
+            self._wait_for_user_message()
+            logger.debug("User message done")
+
+            logger.debug("Initiating task validation")
+            # extract reward, done, user_message, info (task-specific)
+            reward, done, user_message, task_info = self._task_validate()
+            info["task_info"] = task_info
+            logger.debug("Task validation done")
+        else:
+            reward = 0
+            done = False
+            user_message = None
+            info["task_info"] = {}
+            logger.debug("Task validation skipped")
 
         # add any user message sent by the task to the chat
         if user_message:
             self.chat.add_message(role="user", msg=user_message)
 
         # extract observation (generic)
         obs = self._get_obs()
-        logger.debug(f"Observation extracted")
+        logger.debug("Observation extracted")
 
         # new step API wants a 5-tuple (gymnasium)
         terminated = done or (
             self.terminate_on_infeasible and self.infeasible_message_received
         )  # task or agent can terminate the episode
-        truncated = False
-
+        truncated: bool = False
         return obs, reward, terminated, truncated, info
 
     def _task_validate(self):
@@ -506,7 +545,7 @@ def _active_page_check(self):
         # make sure there is always a page open
         # if all pages have been closed, create a new page
         if len(self.context.pages) == 0:
-            logger.warning(f"All pages are closed, opening a new page.")
+            logger.warning("All pages are closed, opening a new page.")
             self.page = self.context.new_page()
 
         # if the active page got closed, get the last active page from the history
diff --git a/browsergym/core/src/browsergym/utils/mcp_server.py b/browsergym/core/src/browsergym/utils/mcp_server.py
@@ -0,0 +1,192 @@
+# MCP server for BrowserGym
+import argparse
+import asyncio
+import re
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import Callable
+
+import gymnasium as gym
+from mcp.server.fastmcp import FastMCP
+
+from browsergym.core.action.highlevel import ACTION_SUBSETS, HighLevelActionSet
+from browsergym.core.env import BrowserEnv
+
+
+@dataclass
+class BgymConfig:
+    headless: bool = True
+    timeout_ms: int = 10000
+    record_video_dir: str | None = None
+    demo_mode: HighLevelActionSet.DemoMode = "default"
+    validate_actions: list[str] = field(default_factory=list)
+
+
+@dataclass
+class AppContext:
+    gym: BrowserEnv
+    config: BgymConfig
+    task_id: str
+    actions: HighLevelActionSet
+
+
+def get_cli_args():
+    parser = argparse.ArgumentParser(
+        description="BrowserGym MCP server",
+        usage="python browsergym/core/src/browsergym/utils/%(prog)s [options]",
+        epilog="To run Dev UI: mcp dev browsergym/core/src/browsergym/utils/mcp_server.py -e browsergym/core/",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-t",
+        "--task_id",
+        type=str,
+        default="browsergym/openended",
+        help="Task ID to run",
+    )
+    parser.add_argument(
+        "-l",
+        "--headless",
+        action="store_true",
+        help="Run in headless mode",
+    )
+    parser.add_argument(
+        "-r",
+        "--record_video_dir",
+        type=str,
+        default=None,
+        help="Directory to save recorded videos",
+    )
+    parser.add_argument(
+        "--demo_mode",
+        type=str,
+        default="off",
+        choices=["off", "default", "all_blue", "only_visible_elements"],
+        help="Demo mode for action set",
+    )
+    parser.add_argument(
+        "--timeout_ms",
+        type=int,
+        default=10000,
+        help="Timeout in milliseconds for each step",
+    )
+    parser.add_argument(
+        "--subset",
+        type=str,
+        default="workarena++",
+        choices=ACTION_SUBSETS.keys(),
+        help="Subset of actions to use",
+    )
+    parser.add_argument(
+        "--validate_actions",
+        type=str,
+        nargs="+",
+        default=["click", "goto"],
+        help="Names of actions for which validation should be performed",
+    )
+    args, _ = parser.parse_known_args()
+    return args
+
+
+args = get_cli_args()
+task_id = args.task_id
+config = BgymConfig(
+    headless=args.headless,
+    timeout_ms=args.timeout_ms,
+    record_video_dir=args.record_video_dir,
+    demo_mode=args.demo_mode,
+    validate_actions=args.validate_actions,
+)
+
+
+@asynccontextmanager
+async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
+    """Manage application lifecycle with type-safe context"""
+    # Initialize on startup
+    actions = HighLevelActionSet(demo_mode=config.demo_mode, subsets=args.subset)
+    _gym: BrowserEnv = await asyncio.to_thread(
+        gym.make,
+        task_id,
+        headless=config.headless,
+        record_video_dir=config.record_video_dir,
+        action_mapping=actions.to_python_code,
+        timeout=config.timeout_ms,
+        task_kwargs={"start_url": "about:blank"},
+    )  # type: ignore
+    await asyncio.to_thread(_gym.reset)
+
+    try:
+        yield AppContext(gym=_gym, config=config, task_id=task_id, actions=actions)
+    finally:
+        # Cleanup on shutdown
+        await asyncio.to_thread(_gym.close)
+
+
+mcp = FastMCP("BrowserGym", lifespan=app_lifespan)
+
+
+def format_func_call(func: Callable, args, kwargs) -> str:
+    args_str = ", ".join(repr(arg) for arg in args)
+    kwargs_str = ", ".join(f"{k}={repr(v)}" for k, v in kwargs.items())
+    all_args_str = ", ".join(filter(None, [args_str, kwargs_str]))
+    return f"{func.__name__}({all_args_str})"
+
+
+def fn_wrapper(func: Callable, validate: bool = True):
+    async def decorator(*args, **kwargs):
+        """
+        Decorator to execute function from the action space in the context of the gym.
+        1. Loads the parent module of the function to use as function context
+        2. Executes the pre_step method of the gym
+        3. Sets up the module vars from the current state of the gym
+        4. Executes the function from this module and handles any exceptions
+        5. Executes the post_step method of the gym
+
+        """
+        gym: BrowserEnv = mcp.get_context().request_context.lifespan_context.gym  # type: ignore
+        while not isinstance(gym, BrowserEnv):
+            gym = (
+                gym.env
+            )  # gym library wraps the BrowserEnv in a few layers (usually 2) of wrappers, this loop unwraps them
+
+        # Load the parent module of the function to use as function context
+        import browsergym.core.action.functions as fn_context
+
+        fn = getattr(fn_context, func.__name__)
+
+        gym.last_action = format_func_call(fn, args, kwargs)
+        info, send_message_to_user, report_infeasible_instructions = await asyncio.to_thread(
+            gym.pre_step
+        )
+
+        # Set up the module vars from the current state of the gym
+        fn_context.send_message_to_user = send_message_to_user
+        fn_context.report_infeasible_instructions = report_infeasible_instructions
+        fn_context.page = gym.page
+        fn_context.demo_mode = config.demo_mode
+
+        try:
+            fn(*args, **kwargs)
+            gym.last_action_error = ""
+        except Exception as e:
+            gym.last_action_error = f"{type(e).__name__}: {e}"
+            match = re.match("TimeoutError: Timeout ([0-9]+)ms exceeded.", gym.last_action_error)
+            if match:
+                info["action_exec_timeout"] = float(match.groups()[0]) / 1000
+
+        results = await asyncio.to_thread(gym.post_step, info, validate)
+        return results
+
+    decorator.__wrapped__ = func  # type: ignore
+    decorator.__name__ = func.__name__
+    decorator.__doc__ = func.__doc__
+    return decorator
+
+
+for fn in ACTION_SUBSETS[args.subset]:
+    validate = fn.__name__ in config.validate_actions
+    mcp.add_tool(fn_wrapper(fn, validate))
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,9 @@
+[project]
+name = "browsergym-meta"
+description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
+dynamic = ["version"]
+[tool.setuptools]
+packages = []  # meta distribution, packages are included as dependencies
 [tool.black]
 line-length = 100
 include = '\.pyi?$'