Skip to content

Commit 75738c4

Browse files
committed
Merge branch 'wa_verified' of github.com:ServiceNow/BrowserGym into wa_verified
2 parents c2d1536 + e5c75ca commit 75738c4

File tree

3 files changed

+11
-4
lines changed
  • .github/workflows
  • browsergym
    • experiments/src/browsergym/experiments/benchmark
    • webarena_verified/src/browsergym/webarena_verified

3 files changed

+11
-4
lines changed

.github/workflows/pypi.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ jobs:
3030
run: python3 -m build browsergym/webarena/ --outdir dist/
3131
- name: Build a binary wheel and a source tarball (browsergym-webarenalite)
3232
run: python3 -m build browsergym/webarenalite/ --outdir dist/
33+
34+
- name: Build a binary wheel and a source tarball (browsergym-webarena-verified)
35+
run: python3 -m build browsergym/webarena_verified/ --outdir dist
3336
- name: Build a binary wheel and a source tarball (browsergym-webarena)
3437
run: python3 -m build browsergym/visualwebarena/ --outdir dist/
3538

browsergym/experiments/src/browsergym/experiments/benchmark/configs.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,16 @@
140140
supports_parallel_seeds=False,
141141
backends=["webarena_verified"],
142142
env_args_list=make_env_args_list_from_repeat_tasks(
143-
task_list=task_list_from_metadata(metadata=task_metadata("webarena_verified")),
143+
task_list=task_list_from_metadata(
144+
metadata=task_metadata("webarena_verified")
145+
),
144146
max_steps=30,
145147
n_repeats=n_repeats,
146148
seeds_rng=np.random.RandomState(42),
147149
),
148150
task_metadata=task_metadata("webarena_verified"),
149-
),
151+
), # TODO: Add webarena-verified hard subsets by filtering tasks in
152+
# https://github.com/ServiceNow/webarena-verified/blob/main/assets/dataset/subsets/webarena-verified-hard.json
150153
"webarena_lite": lambda n_repeats=1: Benchmark(
151154
name="webarena_lite",
152155
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
@@ -267,7 +270,8 @@
267270
backends=["assistantbench"],
268271
env_args_list=make_env_args_list_from_repeat_tasks(
269272
task_list=task_list_from_metadata(
270-
metadata=task_metadata("assistantbench"), filter={"browsergym_split": "valid|test"}
273+
metadata=task_metadata("assistantbench"),
274+
filter={"browsergym_split": "valid|test"},
271275
),
272276
max_steps=30,
273277
n_repeats=n_repeats,

browsergym/webarena_verified/src/browsergym/webarena_verified/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# Check if the json file is the same as the one in the webarena-verified repository
1313
library_json_string = (
1414
importlib.resources.files("webarena_verified")
15-
.joinpath("../../assets/dataset/webarena-verified.json")
15+
.joinpath("assets/dataset/webarena-verified.json")
1616
.read_text()
1717
)
1818
library_json = json.loads(library_json_string)

0 commit comments

Comments
 (0)