Skip to content

Commit 79ac68e

Browse files
authored
Fix_bbox_scale (#357)
* fix bbox scale in extra_properties and prevent extract_screenshot from clearing CDP * add the n_repeats argument to benchmark creation
1 parent d76f8d9 commit 79ac68e

File tree

3 files changed

+45
-26
lines changed

3 files changed

+45
-26
lines changed

browsergym/core/src/browsergym/core/env.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,8 @@ def _get_obs(self):
627627
dom = extract_dom_snapshot(self.page)
628628
axtree = extract_merged_axtree(self.page)
629629
focused_element_bid = extract_focused_element_bid(self.page)
630-
extra_properties = extract_dom_extra_properties(dom)
630+
scale_factor = getattr(self.page, "_bgym_scale_factor", 1.0)
631+
extra_properties = extract_dom_extra_properties(dom, scale_factor=scale_factor)
631632
except (playwright.sync_api.Error, MarkingError) as e:
632633
err_msg = str(e)
633634
# try to add robustness to async events (detached / deleted frames)

browsergym/core/src/browsergym/core/observation.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,14 @@ def extract_screenshot(page: playwright.sync_api.Page):
144144
"devicePixelRatio": 1.0, # Override system DPR
145145
}
146146

147+
# CAPTURE ORIGINAL METRICS BEFORE CHANGING
148+
original_metrics = {
149+
"width": dimensions["width"],
150+
"height": dimensions["height"],
151+
"deviceScaleFactor": 1.0, # The original scale factor
152+
"mobile": False,
153+
}
154+
147155
# Apply scale factor to device metrics for higher resolution capture
148156
cdp.send(
149157
"Emulation.setDeviceMetricsOverride",
@@ -162,8 +170,9 @@ def extract_screenshot(page: playwright.sync_api.Page):
162170
},
163171
)
164172

165-
# Reset device metrics
166-
cdp.send("Emulation.clearDeviceMetricsOverride")
173+
# RESTORE ORIGINAL METRICS (don't just clear)
174+
cdp.send("Emulation.setDeviceMetricsOverride", original_metrics)
175+
167176
cdp.detach()
168177

169178
# bytes of a png file
@@ -281,7 +290,7 @@ def pop_bids_from_attribute(dom_snapshot, attr: str):
281290
break
282291

283292

284-
def extract_dom_extra_properties(dom_snapshot):
293+
def extract_dom_extra_properties(dom_snapshot, scale_factor):
285294
def to_string(idx):
286295
if idx == -1:
287296
return None
@@ -436,9 +445,16 @@ def to_string(idx):
436445
if bid:
437446
if bid in extra_properties:
438447
logger.warning(f"duplicate {BID_ATTR}={repr(bid)} attribute detected")
448+
449+
scaled_bbox = None
450+
if node["bbox"]:
451+
scaled_bbox = [coord * scale_factor for coord in node["bbox"]]
452+
439453
extra_properties[bid] = {
440-
extra_prop: node[extra_prop]
441-
for extra_prop in ("visibility", "bbox", "clickable", "set_of_marks")
454+
"visibility": node["visibility"],
455+
"bbox": scaled_bbox, # Use scaled coordinates
456+
"clickable": node["clickable"],
457+
"set_of_marks": node["set_of_marks"],
442458
}
443459

444460
return extra_properties

browsergym/experiments/src/browsergym/experiments/benchmark/configs.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
2-
3-
from browsergym.experiments.benchmark.metadata.utils import task_list_from_metadata, task_metadata
2+
from browsergym.experiments.benchmark.metadata.utils import (
3+
task_list_from_metadata,
4+
task_metadata,
5+
)
46
from browsergym.experiments.benchmark.utils import (
57
make_env_args_list_from_fixed_seeds,
68
make_env_args_list_from_repeat_tasks,
@@ -88,7 +90,7 @@
8890

8991
# all benchmarks are callables designed for lazy loading, i.e. `bench = DEFAULT_BENCHMARKS["miniwob_all"]()`
9092
DEFAULT_BENCHMARKS = {
91-
"miniwob": lambda: Benchmark(
93+
"miniwob": lambda n_repeats=5: Benchmark(
9294
name="miniwob",
9395
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["miniwob_all"],
9496
is_multi_tab=False,
@@ -97,12 +99,12 @@
9799
env_args_list=make_env_args_list_from_repeat_tasks(
98100
task_list=task_list_from_metadata(metadata=task_metadata("miniwob")),
99101
max_steps=10,
100-
n_repeats=5,
102+
n_repeats=n_repeats,
101103
seeds_rng=np.random.RandomState(42),
102104
),
103105
task_metadata=task_metadata("miniwob"),
104106
),
105-
"miniwob_tiny_test": lambda: Benchmark(
107+
"miniwob_tiny_test": lambda n_repeats=2: Benchmark(
106108
name="miniwob_tiny_test",
107109
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["miniwob_all"],
108110
is_multi_tab=False,
@@ -111,12 +113,12 @@
111113
env_args_list=make_env_args_list_from_repeat_tasks(
112114
task_list=["miniwob.click-dialog", "miniwob.click-checkboxes"],
113115
max_steps=5,
114-
n_repeats=2,
116+
n_repeats=n_repeats,
115117
seeds_rng=np.random.RandomState(42),
116118
),
117119
task_metadata=task_metadata("miniwob"),
118120
),
119-
"webarena": lambda: Benchmark(
121+
"webarena": lambda n_repeats=1: Benchmark(
120122
name="webarena",
121123
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
122124
is_multi_tab=True,
@@ -125,12 +127,12 @@
125127
env_args_list=make_env_args_list_from_repeat_tasks(
126128
task_list=task_list_from_metadata(metadata=task_metadata("webarena")),
127129
max_steps=30,
128-
n_repeats=1,
130+
n_repeats=n_repeats,
129131
seeds_rng=np.random.RandomState(42),
130132
),
131133
task_metadata=task_metadata("webarena"),
132134
),
133-
"webarena_tiny": lambda: Benchmark(
135+
"webarena_tiny": lambda n_repeats=1: Benchmark(
134136
name="webarena_tiny",
135137
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
136138
is_multi_tab=True,
@@ -150,7 +152,7 @@
150152
),
151153
task_metadata=task_metadata("webarena"),
152154
),
153-
"visualwebarena_tiny": lambda: Benchmark(
155+
"visualwebarena_tiny": lambda n_repeats=10: Benchmark(
154156
name="visualwebarena_tiny",
155157
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["visualwebarena"],
156158
is_multi_tab=True,
@@ -168,7 +170,7 @@
168170
),
169171
task_metadata=task_metadata("visualwebarena"),
170172
),
171-
"visualwebarena": lambda: Benchmark(
173+
"visualwebarena": lambda n_repeats=1: Benchmark(
172174
name="visualwebarena",
173175
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["visualwebarena"],
174176
is_multi_tab=True,
@@ -177,12 +179,12 @@
177179
env_args_list=make_env_args_list_from_repeat_tasks(
178180
task_list=task_list_from_metadata(metadata=task_metadata("visualwebarena")),
179181
max_steps=30,
180-
n_repeats=1,
182+
n_repeats=n_repeats,
181183
seeds_rng=np.random.RandomState(42),
182184
),
183185
task_metadata=task_metadata("visualwebarena"),
184186
),
185-
"workarena_l1": lambda: Benchmark(
187+
"workarena_l1": lambda n_repeats=10: Benchmark(
186188
name="workarena_l1",
187189
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena"],
188190
is_multi_tab=False,
@@ -194,11 +196,11 @@
194196
meta_seed=42, # meta seed for evaluation curriculum
195197
max_steps=15,
196198
curriculum_type="agent",
197-
seeds_l1=10,
199+
seeds_l1=n_repeats,
198200
),
199201
task_metadata=task_metadata("workarena"),
200202
),
201-
"workarena_l2_agent_curriculum_eval": lambda: Benchmark(
203+
"workarena_l2_agent_curriculum_eval": lambda n_repeats=1: Benchmark(
202204
name="workarena_l2_agent_curriculum_eval",
203205
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena++"],
204206
is_multi_tab=True,
@@ -213,7 +215,7 @@
213215
),
214216
task_metadata=task_metadata("workarena"),
215217
),
216-
"workarena_l3_agent_curriculum_eval": lambda: Benchmark(
218+
"workarena_l3_agent_curriculum_eval": lambda n_repeats=1: Benchmark(
217219
name="workarena_l3_agent_curriculum_eval",
218220
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena++"],
219221
is_multi_tab=True,
@@ -228,7 +230,7 @@
228230
),
229231
task_metadata=task_metadata("workarena"),
230232
),
231-
"assistantbench": lambda: Benchmark(
233+
"assistantbench": lambda n_repeats=1: Benchmark(
232234
name="assistantbench",
233235
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["assistantbench"],
234236
is_multi_tab=True,
@@ -239,12 +241,12 @@
239241
metadata=task_metadata("assistantbench"), filter={"browsergym_split": "valid|test"}
240242
),
241243
max_steps=30,
242-
n_repeats=1,
244+
n_repeats=n_repeats,
243245
seeds_rng=np.random.RandomState(42),
244246
),
245247
task_metadata=task_metadata("assistantbench"),
246248
),
247-
"weblinx": lambda: Benchmark(
249+
"weblinx": lambda n_repeats=1: Benchmark(
248250
name="weblinx",
249251
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["weblinx"],
250252
is_multi_tab=True,
@@ -253,7 +255,7 @@
253255
env_args_list=make_env_args_list_from_repeat_tasks(
254256
task_list=task_list_from_metadata(metadata=task_metadata("weblinx")),
255257
max_steps=1,
256-
n_repeats=1,
258+
n_repeats=n_repeats,
257259
seeds_rng=np.random.RandomState(42),
258260
),
259261
task_metadata=task_metadata("weblinx"),

0 commit comments

Comments
 (0)