File tree Expand file tree Collapse file tree 3 files changed +11
-4
lines changed
experiments/src/browsergym/experiments/benchmark
webarena_verified/src/browsergym/webarena_verified Expand file tree Collapse file tree 3 files changed +11
-4
lines changed Original file line number Diff line number Diff line change 3030 run : python3 -m build browsergym/webarena/ --outdir dist/
3131 - name : Build a binary wheel and a source tarball (browsergym-webarenalite)
3232 run : python3 -m build browsergym/webarenalite/ --outdir dist/
33+
34+ - name : Build a binary wheel and a source tarball (browsergym-webarena-verified)
35+ run : python3 -m build browsergym/webarena_verified/ --outdir dist
3336 - name : Build a binary wheel and a source tarball (browsergym-webarena)
3437 run : python3 -m build browsergym/visualwebarena/ --outdir dist/
3538
Original file line number Diff line number Diff line change 140140 supports_parallel_seeds = False ,
141141 backends = ["webarena_verified" ],
142142 env_args_list = make_env_args_list_from_repeat_tasks (
143- task_list = task_list_from_metadata (metadata = task_metadata ("webarena_verified" )),
143+ task_list = task_list_from_metadata (
144+ metadata = task_metadata ("webarena_verified" )
145+ ),
144146 max_steps = 30 ,
145147 n_repeats = n_repeats ,
146148 seeds_rng = np .random .RandomState (42 ),
147149 ),
148150 task_metadata = task_metadata ("webarena_verified" ),
149- ),
151+ ), # TODO: Add webarena-verified hard subsets by filtering tasks in
152+ # https://github.com/ServiceNow/webarena-verified/blob/main/assets/dataset/subsets/webarena-verified-hard.json
150153 "webarena_lite" : lambda n_repeats = 1 : Benchmark (
151154 name = "webarena_lite" ,
152155 high_level_action_set_args = DEFAULT_HIGHLEVEL_ACTION_SET_ARGS ["webarena" ],
267270 backends = ["assistantbench" ],
268271 env_args_list = make_env_args_list_from_repeat_tasks (
269272 task_list = task_list_from_metadata (
270- metadata = task_metadata ("assistantbench" ), filter = {"browsergym_split" : "valid|test" }
273+ metadata = task_metadata ("assistantbench" ),
274+ filter = {"browsergym_split" : "valid|test" },
271275 ),
272276 max_steps = 30 ,
273277 n_repeats = n_repeats ,
Original file line number Diff line number Diff line change 1212# Check if the json file is the same as the one in the webarena-verified repository
1313library_json_string = (
1414 importlib .resources .files ("webarena_verified" )
15- .joinpath ("../../ assets/dataset/webarena-verified.json" )
15+ .joinpath ("assets/dataset/webarena-verified.json" )
1616 .read_text ()
1717)
1818library_json = json .loads (library_json_string )
You can’t perform that action at this time.
0 commit comments