|
| 1 | +import importlib.resources |
| 2 | +import json |
1 | 3 | import logging |
2 | 4 | import multiprocessing as mp |
3 | 5 | import os |
@@ -104,6 +106,27 @@ def make_env_args_list_from_fixed_seeds( |
104 | 106 | return env_args_list |
105 | 107 |
|
106 | 108 |
|
| 109 | +def get_webarena_verified_task_name(intent_template_id: int, task_id: int) -> str: |
| 110 | + """ |
| 111 | + Returns the task name (with revision) for a given intent template id and task id. |
| 112 | + """ |
| 113 | + # Load the json file from the webarena-verified library |
| 114 | + data = json.loads( |
| 115 | + importlib.resources.files("webarena_verified") |
| 116 | + .joinpath("assets/dataset/webarena-verified.json") |
| 117 | + .read_text() |
| 118 | + ) |
| 119 | + for task in data: |
| 120 | + if task["intent_template_id"] == intent_template_id and task["task_id"] == task_id: |
| 121 | + revision = task["revision"] |
| 122 | + break |
| 123 | + else: |
| 124 | + raise ValueError( |
| 125 | + f"No task found for intent template id {intent_template_id} and task id {task_id} in webarena-verified.json" |
| 126 | + ) |
| 127 | + return f"webarena_verified.{intent_template_id}.{task_id}.{revision}" |
| 128 | + |
| 129 | + |
107 | 130 | def prepare_backend(backend: str): |
108 | 131 | match backend: |
109 | 132 | case "miniwob": |
@@ -157,16 +180,16 @@ def prepare_backend(backend: str): |
157 | 180 | ) |
158 | 181 | massage_tasks( |
159 | 182 | [ |
160 | | - f"webarena_verified.{intent_template_id}.{task_id}.{revision}" |
161 | | - for intent_template_id, task_id, revision in [ |
162 | | - (23, 410, 2), # reddit |
163 | | - (330, 533, 2), # gitlab |
164 | | - (87, 561, 3), # gitlab wiki |
165 | | - (88, 562, 2), # gitlab reddit |
166 | | - (165, 574, 2), # shopping |
167 | | - (16, 640, 2), # reddit |
168 | | - (253, 680, 2), # shopping_admin |
169 | | - (94, 740, 2), # wiki map |
| 183 | + get_webarena_verified_task_name(intent_template_id, task_id) |
| 184 | + for intent_template_id, task_id in [ |
| 185 | + (23, 410), # reddit |
| 186 | + # (330, 533), # gitlab |
| 187 | + # (87, 561), # gitlab wiki |
| 188 | + # (88, 562), # gitlab reddit |
| 189 | + (165, 574), # shopping |
| 190 | + (16, 640), # reddit |
| 191 | + (253, 680), # shopping_admin |
| 192 | + # (94, 740), # wiki map |
170 | 193 | ] |
171 | 194 | ] |
172 | 195 | ) |
|
0 commit comments