1-
1+ import logging
22import os
33from typing import Any , ClassVar
44
55from browsergym .miniwob import ALL_MINIWOB_TASKS
66
77from agentlab .benchmarks .web_task import AbstractWebTask
88
9+ logger = logging .getLogger (__name__ )
10+
911
1012class MiniWobTask (AbstractWebTask ):
1113 dataset : str = "miniwob"
@@ -17,10 +19,10 @@ class MiniWobTask(AbstractWebTask):
1719 remove_human_display : bool = True
1820 episode_max_time : int = 1000000
1921 max_turns : int = 10
22+ validate_per_step : bool = True
2023 actions_whitelist : ClassVar [list [str ]] = [
2124 "browser_press_key" ,
2225 "browser_type" ,
23- "browser_navigate" ,
2426 "browser_click" ,
2527 "browser_drag" ,
2628 "browser_hover" ,
@@ -29,9 +31,10 @@ class MiniWobTask(AbstractWebTask):
2931
3032 def model_post_init (self , __context : Any ):
3133 self .url = f"{ self .base_url } /{ self .subdomain } .html"
32-
34+
3335 def get_setup_js (self ) -> str :
3436 if self .remove_human_display :
37+ logger .info ("Remove human display" )
3538 js = r"""
3639let __display_ids = ['reward-display', 'click-canvas', 'sync-task-cover'];
3740let __display_divs = {};
@@ -93,10 +96,12 @@ def get_setup_js(self) -> str:
9396Math.seedrandom(42);
9497core.EPISODE_MAX_TIME = { self .episode_max_time } ;
9598core.startEpisodeReal();
99+ start_time = Date.now();
96100while (!WOB_TASK_READY) {{
97101 await new Promise(resolve => setTimeout(resolve, 100));
98102}}
99- return core.getUtterance();
103+ ready_time = Date.now();
104+ return {{'goal': core.getUtterance(), 'done': WOB_DONE_GLOBAL, 'task_start_time': ready_time - start_time}};
100105 """
101106 return f"async () => {{{ js } }}"
102107
@@ -113,29 +118,34 @@ def get_task_validate_js(self) -> str:
113118return [WOB_REWARD_GLOBAL, WOB_RAW_REWARD_GLOBAL, WOB_REWARD_REASON, WOB_DONE_GLOBAL, WOB_EPISODE_ID, WOB_TASK_READY];
114119}"""
115120
116-
117121 def parse_validation_result (self , validation_result : str ) -> tuple [float , dict ]:
122+ logger .info (f"Validation result: { validation_result } " )
118123 chunks = [c .strip () for c in validation_result .split ("," )]
119124 raw_reward = float (chunks [1 ])
120- done = bool ( chunks [3 ])
125+ done = chunks [3 ]. strip (). lower () == "true"
121126 reward = float (raw_reward > 0 )
122127 return reward , {
123- "raw_reward" : raw_reward ,
124- "reward_reason" : chunks [2 ],
125- "done" : done ,
128+ "raw_reward" : raw_reward ,
129+ "reward_reason" : chunks [2 ],
130+ "done" : done ,
126131 }
127132
128- def get_miniwob_tasks (base_url : str | None = None , remove_human_display : bool = True , episode_max_time : int = 1000000 ) -> list [MiniWobTask ]:
133+
134+ def get_miniwob_tasks (
135+ base_url : str | None = None , remove_human_display : bool = True , episode_max_time : int = 1000000
136+ ) -> list [MiniWobTask ]:
129137 if base_url is None :
130138 base_url = os .environ .get ("MINIWOB_URL" )
131139 if base_url is None :
132140 raise ValueError ("MINIWOB_URL environment variable is not set" )
133141 return [
134142 MiniWobTask (
135- task_id = task .subdomain ,
136- desc = task .desc ,
137- subdomain = task .subdomain ,
138- base_url = base_url ,
139- remove_human_display = remove_human_display ,
140- episode_max_time = episode_max_time ) for task in ALL_MINIWOB_TASKS
141- ]
143+ task_id = task .subdomain ,
144+ desc = task .desc ,
145+ subdomain = task .subdomain ,
146+ base_url = base_url ,
147+ remove_human_display = remove_human_display ,
148+ episode_max_time = episode_max_time ,
149+ )
150+ for task in ALL_MINIWOB_TASKS
151+ ]
0 commit comments