|
| 1 | +import tempfile |
| 2 | +from dataclasses import dataclass |
| 3 | +from typing import Any |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +from agentlab.agents.visualwebarena.agent import VWAAgent, VWAAgentArgs |
| 8 | +from agentlab.analyze import inspect_results |
| 9 | +from agentlab.experiments.study import Study |
| 10 | +from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT |
| 11 | + |
| 12 | +mock_image = "" |
| 13 | + |
| 14 | + |
| 15 | +class MockGoalImageAgent(VWAAgent): |
| 16 | + def obs_preprocessor(self, obs: dict) -> Any: |
| 17 | + res = super().obs_preprocessor(obs) |
| 18 | + assert isinstance(res["goal_object"], tuple) |
| 19 | + assert len(res["goal_object"]) == 1 |
| 20 | + assert isinstance(res["goal_object"][0], dict) |
| 21 | + assert "type" in res["goal_object"][0] |
| 22 | + assert res["goal_object"][0]["type"] == "text" |
| 23 | + assert "text" in res["goal_object"][0] |
| 24 | + res["goal_object"] = ( |
| 25 | + res["goal_object"][0], |
| 26 | + {"type": "image_url", "image_url": {"url": mock_image}}, |
| 27 | + ) |
| 28 | + return res |
| 29 | + |
| 30 | + |
| 31 | +@dataclass |
| 32 | +class MockGoalImageAgentArgs(VWAAgentArgs): |
| 33 | + agent_name: str = "debug_vwa" |
| 34 | + temperature: float = 0.1 |
| 35 | + chat_model_args = None |
| 36 | + |
| 37 | + def make_agent(self) -> MockGoalImageAgent: |
| 38 | + return MockGoalImageAgent( |
| 39 | + chat_model_args=self.chat_model_args, |
| 40 | + n_retry=3, |
| 41 | + ) |
| 42 | + |
| 43 | + |
| 44 | +@pytest.mark.pricy |
| 45 | +def test_mock_goal_image_agent(): |
| 46 | + |
| 47 | + with tempfile.TemporaryDirectory() as tmp_dir: |
| 48 | + study = Study( |
| 49 | + [ |
| 50 | + MockGoalImageAgentArgs( |
| 51 | + chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4o-mini-2024-07-18"] |
| 52 | + ) |
| 53 | + ], |
| 54 | + benchmark="miniwob_tiny_test", |
| 55 | + ) |
| 56 | + study.run(n_jobs=1, parallel_backend="sequential") |
| 57 | + |
| 58 | + results_df = inspect_results.load_result_df(study.dir, progress_fn=None) |
| 59 | + |
| 60 | + for row in results_df.iterrows(): |
| 61 | + if row[1].err_msg: |
| 62 | + print(row[1].err_msg) |
| 63 | + print(row[1].stack_trace) |
| 64 | + |
| 65 | + assert len(results_df) == len(study.exp_args_list) |
| 66 | + summary = inspect_results.summarize_study(results_df) |
| 67 | + print(summary) |
| 68 | + assert len(summary) == 1 |
| 69 | + reward = summary.avg_reward.iloc[0] |
| 70 | + assert reward == 1.0 |
| 71 | + |
| 72 | + |
| 73 | +if __name__ == "__main__": |
| 74 | + test_mock_goal_image_agent() |
0 commit comments