Skip to content

Commit 9f08d92

Browse files
authored
Add support for weblinx (#60)
* Update _get_benchmark_version to include weblinx * Update agentxray port to use env var * Update get_benchmark_env_args to include weblinx support
1 parent fc96fd8 commit 9f08d92

File tree

3 files changed

+15
-3
lines changed

3 files changed

+15
-3
lines changed

src/agentlab/analyze/agent_xray.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import base64
2+
import os
23
import traceback
34
from copy import deepcopy
45
from io import BytesIO
@@ -481,7 +482,7 @@ def run_gradio(results_dir: Path):
481482
tabs.select(tab_select)
482483

483484
demo.queue()
484-
demo.launch(server_port=7899, share=True)
485+
demo.launch(server_port=int(os.getenv("AGENTXRAY_APP_PORT", 7899)), share=True)
485486

486487

487488
def tab_select(evt: gr.SelectData):

src/agentlab/experiments/reproducibility_util.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ def _get_benchmark_version(benchmark_name):
3030
return metadata.distribution("browsergym.webarena").version
3131
elif benchmark_name.startswith("visualwebarena"):
3232
return metadata.distribution("browsergym.visualwebarena").version
33+
elif benchmark_name.startswith("weblinx"):
34+
try:
35+
return metadata.distribution("weblinx_browsergym").version
36+
except metadata.PackageNotFoundError:
37+
return "0.0.1rc1"
3338
else:
3439
raise ValueError(f"Unknown benchmark {benchmark_name}")
3540

src/agentlab/experiments/task_collections.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def get_benchmark_env_args(
124124
"webarena": 15,
125125
"miniwob": 10,
126126
"miniwob_tiny_test": 5,
127+
"weblinx": None,
127128
}
128129

129130
n_repeat_default = {
@@ -133,12 +134,13 @@ def get_benchmark_env_args(
133134
"webarena": 1,
134135
"miniwob": 5,
135136
"miniwob_tiny_test": 2,
137+
"weblinx": 1,
136138
}
137139

138140
if max_steps is None:
139-
max_steps = max_steps_default[benchmark_id]
141+
max_steps = max_steps_default.get(benchmark_id, None)
140142
if n_repeat is None:
141-
n_repeat = n_repeat_default[benchmark_id]
143+
n_repeat = n_repeat_default.get(benchmark_id, 1)
142144
else:
143145
if benchmark_id == "webarena" and n_repeat != 1:
144146
logger.warning(
@@ -184,6 +186,10 @@ def get_benchmark_env_args(
184186
env_args_list = _make_env_args(
185187
miniwob_benchmarks_map[benchmark_name], max_steps, n_repeat, rng
186188
)
189+
elif benchmark_name.startswith("weblinx"):
190+
from weblinx_browsergym import ALL_WEBLINX_TASK_IDS
191+
192+
env_args_list = _make_env_args(ALL_WEBLINX_TASK_IDS, max_steps, n_repeat, rng)
187193
else:
188194
raise ValueError(f"Unknown benchmark name: {benchmark_name}")
189195

0 commit comments

Comments
 (0)