Skip to content

Commit 8be928a

Browse files
committed
fixes
1 parent f2c480a commit 8be928a

File tree

5 files changed

+18
-11
lines changed

5 files changed

+18
-11
lines changed

experiments/run_miniwob.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
from bgym import DEFAULT_BENCHMARKS
55
from dotenv import load_dotenv
66

7-
from agentlab.agents.generic_agent.agent_configs import FLAGS_GPT_4o
7+
from agentlab.agents.generic_agent.agent_configs import GPT5_MINI_FLAGS
88
from agentlab.agents.generic_agent.generic_agent import GenericAgentArgs
99
from agentlab.agents.tapeagent.agent import TapeAgentArgs, load_config
1010
from agentlab.backends.browser.mcp_playwright import MCPPlaywright
11-
from agentlab.backends.browser.playwright import PlaywrightSyncBackend
11+
from agentlab.backends.browser.playwright import AsyncPlaywright
1212
from agentlab.benchmarks.miniwob import MiniWobBenchmark
1313
from agentlab.experiments.study import make_study
1414
from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
@@ -22,11 +22,12 @@
2222
config = load_config("miniwob")
2323

2424
# benchmark = DEFAULT_BENCHMARKS["miniwob"](n_repeats=1)
25-
benchmark = MiniWobBenchmark(backend=MCPPlaywright())
25+
# benchmark = MiniWobBenchmark(backend=MCPPlaywright())
26+
benchmark = MiniWobBenchmark(backend=AsyncPlaywright())
2627

2728
# agent_args = GenericAgentArgs(
28-
# chat_model_args=CHAT_MODEL_ARGS_DICT["openrouter/openai/gpt-5-mini"],
29-
# flags=FLAGS_GPT_4o,
29+
# chat_model_args=CHAT_MODEL_ARGS_DICT["azure/gpt-5-mini-2025-08-07"],
30+
# flags=GPT5_MINI_FLAGS,
3031
# )
3132
# agent_args.flags.obs.use_ax_tree = False
3233
# agent_args.flags.obs.use_html = True

src/agentlab/agents/tapeagent/agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from tapeagents.agent import Agent
1313
from tapeagents.core import (
1414
Action,
15+
ControlFlow,
1516
LLMOutputParsingFailureAction,
1617
Observation,
1718
SetNextNode,
@@ -251,7 +252,7 @@ def get_action(
251252
if not event.step:
252253
continue
253254
self.tape = self.tape.append(event.step)
254-
if isinstance(event.step, Thought):
255+
if isinstance(event.step, Thought) and not isinstance(event.step, ControlFlow):
255256
thoughts.append(event.step)
256257
logger.info(f"Thought: {event.step.llm_view()}")
257258
elif isinstance(event.step, Action) and not action: # we use first action only

src/agentlab/agents/tapeagent/conf/agent/react.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@ nodes:
1010
system_prompt: |
1111
You are an expert AI Agent trained to assist users with complex web tasks.
1212
Your role is to understand the goal, perform actions until the goal is accomplished and respond in a helpful and accurate manner.
13-
Keep your replies concise and direct. Prioritize clarity and avoid over-elaboration.
13+
Keep your replies brief, concise, direct and on topic. Prioritize clarity and avoid over-elaboration.
1414
Do not express emotions or opinions.
1515
guidance: |
1616
Think along the following lines:
1717
1. Summarize the last observation and describe the visible changes in the state.
18-
2. Evaluate action success, explain impact on task/plan.
19-
3. If there are any errors, describe them and propose alternative.
20-
4. List next steps to move towards the goaland propose next immediate action.
21-
The produce the function call that performs the proposed step. If the task is complete, produce the final step.
18+
2. Evaluate action success, explain impact on task and next steps.
19+
3. If you see any errors in the last observation, think about it. If there is no error, just move on.
20+
4. List next steps to move towards the goal and propose next immediate action.
21+
Then produce the function call that performs the proposed action. If the task is complete, produce the final step.
2222
steps:
2323
- tapeagents.core.FinalStep
2424
next_node: react

src/agentlab/backends/browser/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ class ToolCallAction(BaseModel):
4040
id: str = ""
4141
function: FunctionCall
4242

43+
def llm_view(self, **kwargs) -> str:
44+
return self.model_dump_json(indent=2)
45+
4346

4447
class ToolSpec(BaseModel):
4548
"""

src/agentlab/experiments/loop.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from browsergym.experiments.utils import count_tokens
2424
from dataclasses_json import DataClassJsonMixin
2525
from PIL import Image
26+
from pydantic import BaseModel
2627
from tqdm import tqdm
2728

2829
from agentlab.backends.browser.env import BrowserEnvArgs
@@ -411,6 +412,7 @@ def run(self):
411412
logger.debug(f"Starting step {step_info.step}.")
412413
step_info.profiling.agent_start = time.time()
413414
action, step_info.agent_info = agent.get_action(step_info.obs.copy())
415+
step_info.action = action.model_dump_json(indent=2) if isinstance(action, BaseModel) else str(action)
414416
step_info.profiling.agent_stop = time.time()
415417
if step_info.agent_info.get("think", None):
416418
logger.info(f"Agent thought: {step_info.agent_info['think']}")

0 commit comments

Comments
 (0)