Skip to content

Commit cfc85c6

Browse files
committed
fixes
1 parent 9acd97d commit cfc85c6

File tree

5 files changed

+12
-6
lines changed

5 files changed

+12
-6
lines changed

experiments/run_miniwob.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import logging
22
import os
33

4+
from dotenv import load_dotenv
5+
46
from agentlab.agents.tapeagent.agent import TapeAgentArgs, load_config
57
from agentlab.backends.browser.mcp_playwright import MCPPlaywright
68
from agentlab.benchmarks.miniwob import MiniWobBenchmark
@@ -9,6 +11,7 @@
911
fmt = "%(asctime)s - %(levelname)s - %(name)s:%(lineno)d - %(funcName)s() - %(message)s"
1012
logging.basicConfig(level=logging.INFO, force=True, format=fmt, handlers=[logging.StreamHandler()])
1113
logger = logging.getLogger(__name__)
14+
load_dotenv()
1215

1316
if __name__ == "__main__":
1417
config = load_config("miniwob")
@@ -20,7 +23,7 @@
2023
logging_level_stdout=logging.INFO,
2124
)
2225
if os.environ.get("AGENTLAB_DEBUG"):
23-
study.exp_args_list = study.exp_args_list[:3]
26+
study.exp_args_list = study.exp_args_list[:1]
2427
study.run(n_jobs=1, n_relaunch=1, parallel_backend="sequential")
2528
else:
2629
study.run(n_jobs=config.n_jobs, n_relaunch=1, parallel_backend=config.parallel_backend)

experiments/test_mcp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
from tapeagents.environment import FunctionCall
2-
from tapeagents.mcp import ToolCallAction
3-
41
from agentlab.backends.browser.mcp_playwright import MCPPlaywright
52
from agentlab.benchmarks.miniwob.task import get_miniwob_tasks
63

@@ -11,6 +8,9 @@ def main():
118
setup_js = task.get_setup_js()
129

1310
backend = MCPPlaywright()
11+
backend.initialize()
12+
print(backend.actions())
13+
1414
print("="*100)
1515
# 1. goto task url
1616
print("URL: ", task.url)

src/agentlab/backends/browser/env.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def __init__(self, task_name: str, task: AbstractWebTask, backend: BrowserBacken
2929
self.task = task
3030
self.seed = seed
3131
self._turns = 0
32+
self.max_turns = task.max_turns
3233
self.backend = backend
3334
self.backend.initialize()
3435

@@ -45,7 +46,7 @@ def reset(self, seed: int):
4546
def step(self, action: ToolCallAction | str) -> tuple[Observation, float, bool, bool, dict]:
4647
if isinstance(action, str):
4748
action = ToolsActionSet.parse_action(action)
48-
logger.info(f"BrowserEnv.step() called with action {action.function.name}")
49+
logger.info(f"BrowserEnv.step() called with action {action}")
4950

5051
action_exec_start = time.time()
5152
finished = isinstance(action, StopStep)
@@ -61,7 +62,7 @@ def step(self, action: ToolCallAction | str) -> tuple[Observation, float, bool,
6162
if self.task.validate_per_step or finished or truncated:
6263
reward = self.calculate_reward(action, observation)
6364
else:
64-
reward = None
65+
reward = 0.0
6566

6667
env_info = {
6768
"step_metadata": observation.metadata,

src/agentlab/benchmarks/miniwob/task.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class MiniWobTask(AbstractWebTask):
1616
url: str = None
1717
remove_human_display: bool = True
1818
episode_max_time: int = 1000000
19+
max_turns: int = 10
1920
actions_whitelist: ClassVar[list[str]] = [
2021
"browser_press_key",
2122
"browser_type",

src/agentlab/benchmarks/web_task.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ class AbstractWebTask(BaseModel):
99
url: str
1010
validate_per_step: bool = False
1111
actions_whitelist: ClassVar[list[str]] = []
12+
max_turns: int = 100
1213

1314
@classmethod
1415
def filter_actions(cls, actions: list[ToolSpec]) -> list[str]:

0 commit comments

Comments
 (0)