Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion browsergym/core/src/browsergym/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,8 @@ def _get_obs(self):
dom = extract_dom_snapshot(self.page)
axtree = extract_merged_axtree(self.page)
focused_element_bid = extract_focused_element_bid(self.page)
extra_properties = extract_dom_extra_properties(dom)
scale_factor = getattr(self.page, "_bgym_scale_factor", 1.0)
extra_properties = extract_dom_extra_properties(dom, scale_factor=scale_factor)
Comment on lines +630 to +631
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing scale_factor documentation category Documentation

Tell me more
What is the issue?

The scale_factor parameter usage is not documented, making it unclear why and when this scaling is applied.

Why this matters

Future maintainers won't understand the purpose of _bgym_scale_factor or know when to adjust this value.

Suggested change ∙ Feature Preview

Add comment above scale_factor line:

Apply viewport scaling factor to handle high DPI displays

scale_factor = getattr(self.page, "_bgym_scale_factor", 1.0)

Provide feedback to improve future suggestions

Nice Catch Incorrect Not in Scope Not in coding standard Other

💬 Looking for more details? Reply to this comment to chat with Korbit.

except (playwright.sync_api.Error, MarkingError) as e:
err_msg = str(e)
# try to add robustness to async events (detached / deleted frames)
Expand Down
26 changes: 21 additions & 5 deletions browsergym/core/src/browsergym/core/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,14 @@ def extract_screenshot(page: playwright.sync_api.Page):
"devicePixelRatio": 1.0, # Override system DPR
}

# CAPTURE ORIGINAL METRICS BEFORE CHANGING
original_metrics = {
"width": dimensions["width"],
"height": dimensions["height"],
"deviceScaleFactor": 1.0, # The original scale factor
"mobile": False,
}

# Apply scale factor to device metrics for higher resolution capture
cdp.send(
"Emulation.setDeviceMetricsOverride",
Expand All @@ -162,8 +170,9 @@ def extract_screenshot(page: playwright.sync_api.Page):
},
)

# Reset device metrics
cdp.send("Emulation.clearDeviceMetricsOverride")
# RESTORE ORIGINAL METRICS (don't just clear)
cdp.send("Emulation.setDeviceMetricsOverride", original_metrics)

cdp.detach()

# bytes of a png file
Expand Down Expand Up @@ -281,7 +290,7 @@ def pop_bids_from_attribute(dom_snapshot, attr: str):
break


def extract_dom_extra_properties(dom_snapshot):
def extract_dom_extra_properties(dom_snapshot, scale_factor):
def to_string(idx):
if idx == -1:
return None
Expand Down Expand Up @@ -436,9 +445,16 @@ def to_string(idx):
if bid:
if bid in extra_properties:
logger.warning(f"duplicate {BID_ATTR}={repr(bid)} attribute detected")

scaled_bbox = None
if node["bbox"]:
scaled_bbox = [coord * scale_factor for coord in node["bbox"]]

extra_properties[bid] = {
extra_prop: node[extra_prop]
for extra_prop in ("visibility", "bbox", "clickable", "set_of_marks")
"visibility": node["visibility"],
"bbox": scaled_bbox, # Use scaled coordinates
"clickable": node["clickable"],
"set_of_marks": node["set_of_marks"],
}

return extra_properties
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np

from browsergym.experiments.benchmark.metadata.utils import task_list_from_metadata, task_metadata
from browsergym.experiments.benchmark.metadata.utils import (
task_list_from_metadata,
task_metadata,
)
from browsergym.experiments.benchmark.utils import (
make_env_args_list_from_fixed_seeds,
make_env_args_list_from_repeat_tasks,
Expand Down Expand Up @@ -88,7 +90,7 @@

# all benchmarks are callables designed for lazy loading, i.e. `bench = DEFAULT_BENCHMARKS["miniwob_all"]()`
DEFAULT_BENCHMARKS = {
"miniwob": lambda: Benchmark(
"miniwob": lambda n_repeats=5: Benchmark(
name="miniwob",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["miniwob_all"],
is_multi_tab=False,
Expand All @@ -97,12 +99,12 @@
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("miniwob")),
max_steps=10,
n_repeats=5,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("miniwob"),
),
"miniwob_tiny_test": lambda: Benchmark(
"miniwob_tiny_test": lambda n_repeats=2: Benchmark(
name="miniwob_tiny_test",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["miniwob_all"],
is_multi_tab=False,
Expand All @@ -111,12 +113,12 @@
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=["miniwob.click-dialog", "miniwob.click-checkboxes"],
max_steps=5,
n_repeats=2,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("miniwob"),
),
"webarena": lambda: Benchmark(
"webarena": lambda n_repeats=1: Benchmark(
name="webarena",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
is_multi_tab=True,
Expand All @@ -125,12 +127,12 @@
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("webarena")),
max_steps=30,
n_repeats=1,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("webarena"),
),
"webarena_tiny": lambda: Benchmark(
"webarena_tiny": lambda n_repeats=1: Benchmark(
name="webarena_tiny",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["webarena"],
is_multi_tab=True,
Expand All @@ -150,7 +152,7 @@
),
task_metadata=task_metadata("webarena"),
),
"visualwebarena_tiny": lambda: Benchmark(
"visualwebarena_tiny": lambda n_repeats=10: Benchmark(
name="visualwebarena_tiny",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["visualwebarena"],
is_multi_tab=True,
Expand All @@ -168,7 +170,7 @@
),
task_metadata=task_metadata("visualwebarena"),
),
"visualwebarena": lambda: Benchmark(
"visualwebarena": lambda n_repeats=1: Benchmark(
name="visualwebarena",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["visualwebarena"],
is_multi_tab=True,
Expand All @@ -177,12 +179,12 @@
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("visualwebarena")),
max_steps=30,
n_repeats=1,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("visualwebarena"),
),
"workarena_l1": lambda: Benchmark(
"workarena_l1": lambda n_repeats=10: Benchmark(
name="workarena_l1",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena"],
is_multi_tab=False,
Expand All @@ -194,11 +196,11 @@
meta_seed=42, # meta seed for evaluation curriculum
max_steps=15,
curriculum_type="agent",
seeds_l1=10,
seeds_l1=n_repeats,
),
task_metadata=task_metadata("workarena"),
),
"workarena_l2_agent_curriculum_eval": lambda: Benchmark(
"workarena_l2_agent_curriculum_eval": lambda n_repeats=1: Benchmark(
name="workarena_l2_agent_curriculum_eval",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena++"],
is_multi_tab=True,
Expand All @@ -213,7 +215,7 @@
),
task_metadata=task_metadata("workarena"),
),
"workarena_l3_agent_curriculum_eval": lambda: Benchmark(
"workarena_l3_agent_curriculum_eval": lambda n_repeats=1: Benchmark(
name="workarena_l3_agent_curriculum_eval",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["workarena++"],
is_multi_tab=True,
Expand All @@ -228,7 +230,7 @@
),
task_metadata=task_metadata("workarena"),
),
"assistantbench": lambda: Benchmark(
"assistantbench": lambda n_repeats=1: Benchmark(
name="assistantbench",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["assistantbench"],
is_multi_tab=True,
Expand All @@ -239,12 +241,12 @@
metadata=task_metadata("assistantbench"), filter={"browsergym_split": "valid|test"}
),
max_steps=30,
n_repeats=1,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("assistantbench"),
),
"weblinx": lambda: Benchmark(
"weblinx": lambda n_repeats=1: Benchmark(
name="weblinx",
high_level_action_set_args=DEFAULT_HIGHLEVEL_ACTION_SET_ARGS["weblinx"],
is_multi_tab=True,
Expand All @@ -253,7 +255,7 @@
env_args_list=make_env_args_list_from_repeat_tasks(
task_list=task_list_from_metadata(metadata=task_metadata("weblinx")),
max_steps=1,
n_repeats=1,
n_repeats=n_repeats,
seeds_rng=np.random.RandomState(42),
),
task_metadata=task_metadata("weblinx"),
Expand Down