Skip to content

Commit 29ce81b

Browse files
committed
do not hardcode revision number
1 parent cf11699 commit 29ce81b

File tree

1 file changed

+33
-10
lines changed
  • browsergym/experiments/src/browsergym/experiments/benchmark

1 file changed

+33
-10
lines changed

browsergym/experiments/src/browsergym/experiments/benchmark/utils.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import importlib.resources
2+
import json
13
import logging
24
import multiprocessing as mp
35
import os
@@ -104,6 +106,27 @@ def make_env_args_list_from_fixed_seeds(
104106
return env_args_list
105107

106108

109+
def get_webarena_verified_task_name(intent_template_id: int, task_id: int) -> str:
110+
"""
111+
Returns the task name (with revision) for a given intent template id and task id.
112+
"""
113+
# Load the json file from the webarena-verified library
114+
data = json.loads(
115+
importlib.resources.files("webarena_verified")
116+
.joinpath("assets/dataset/webarena-verified.json")
117+
.read_text()
118+
)
119+
for task in data:
120+
if task["intent_template_id"] == intent_template_id and task["task_id"] == task_id:
121+
revision = task["revision"]
122+
break
123+
else:
124+
raise ValueError(
125+
f"No task found for intent template id {intent_template_id} and task id {task_id} in webarena-verified.json"
126+
)
127+
return f"webarena_verified.{intent_template_id}.{task_id}.{revision}"
128+
129+
107130
def prepare_backend(backend: str):
108131
match backend:
109132
case "miniwob":
@@ -157,16 +180,16 @@ def prepare_backend(backend: str):
157180
)
158181
massage_tasks(
159182
[
160-
f"webarena_verified.{intent_template_id}.{task_id}.{revision}"
161-
for intent_template_id, task_id, revision in [
162-
(23, 410, 2), # reddit
163-
(330, 533, 2), # gitlab
164-
(87, 561, 3), # gitlab wiki
165-
(88, 562, 2), # gitlab reddit
166-
(165, 574, 2), # shopping
167-
(16, 640, 2), # reddit
168-
(253, 680, 2), # shopping_admin
169-
(94, 740, 2), # wiki map
183+
get_webarena_verified_task_name(intent_template_id, task_id)
184+
for intent_template_id, task_id in [
185+
(23, 410), # reddit
186+
# (330, 533), # gitlab
187+
# (87, 561), # gitlab wiki
188+
# (88, 562), # gitlab reddit
189+
(165, 574), # shopping
190+
(16, 640), # reddit
191+
(253, 680), # shopping_admin
192+
# (94, 740), # wiki map
170193
]
171194
]
172195
)

0 commit comments

Comments
 (0)