Skip to content

Commit d9d2dd4

Browse files
authored
Fill->type to trigger drop down (#358)
* fix bbox scale in extra_properties and prevent extract_screenshot from clearing CDP * add the n_repeats argument to benchmark creation * in prep of modifying dom_wait * fill function "types" the last character to trigger autocomplete in ui * reverting to old dom_wait temporarily * Revert to old behaviour as default * sneaking in this little change
1 parent 8d338b6 commit d9d2dd4

File tree

4 files changed

+29
-7
lines changed

4 files changed

+29
-7
lines changed

browsergym/core/src/browsergym/core/action/functions.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,32 @@ def noop(wait_ms: float = 1000):
5757

5858

5959
# https://playwright.dev/docs/input#text-input
60-
def fill(bid: str, value: str):
60+
def fill(bid: str, value: str, enable_autocomplete_menu: bool = False):
6161
"""
6262
Fill out a form field. It focuses the element and triggers an input event with the entered text.
6363
It works for <input>, <textarea> and [contenteditable] elements.
6464
6565
Examples:
66-
fill('237', 'example value')
6766
fill('45', "multi-line\\nexample")
6867
fill('a12', "example with \\"quotes\\"")
68+
fill('b534', "Montre", True) # This will trigger the autocomplete menu if available
6969
"""
7070
elem = get_elem_by_bid(page, bid, demo_mode != "off")
7171
add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)
7272

7373
def do(force: bool):
7474
if demo_mode != "off":
75+
# Demo mode: full typing for visual effect
7576
delay = max(2000 / len(value), 10)
7677
elem.clear(force=force, timeout=500)
77-
elem.type(value, delay=delay, timeout=0) # no timeout
78+
elem.type(value, delay=delay, timeout=0)
79+
elif enable_autocomplete_menu:
80+
# Hybrid: fill n-1 chars, type the last one to trigger autocomplete
81+
elem.clear(force=force, timeout=500)
82+
if len(value) > 1:
83+
elem.fill(value[:-1], force=force, timeout=500)
84+
if len(value) > 0:
85+
elem.type(value[-1], delay=0, timeout=500)
7886
else:
7987
elem.fill(value, force=force, timeout=500)
8088

browsergym/core/src/browsergym/core/action/highlevel.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@ def to_tool_description(self, api="openai", add_examples=True) -> list[dict]:
552552
}
553553
if param.default == inspect.Parameter.empty:
554554
parameters["required"].append(param_name)
555+
else:
556+
parameters["properties"][param_name]["default"] = param.default
555557

556558
# Construct the tool descriptor
557559
description = action.description

browsergym/core/src/browsergym/core/env.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def __init__(
7777
# agent-related arguments
7878
action_mapping: Optional[callable] = HighLevelActionSet().to_python_code,
7979
use_raw_page_output: bool = False,
80+
pre_observation_delay: float = 0.5, # seconds
8081
):
8182
"""
8283
Instantiate a ready to use BrowserEnv gym environment.
@@ -98,7 +99,7 @@ def __init__(
9899
pw_context_kwargs: extra parameters for the playwright BrowserContext. Should only be used for debugging/testing.
99100
action_mapping: if set, the environment will use this function to map every received action to executable Python code.
100101
use_raw_page_output: if set, the environment will use the raw page output instead of the default processing.
101-
102+
pre_observation_delay: float = 0.5, number of seconds to wait before starting to extract the observation. This can be important if there are some auto-complete menu that may appear after filling a field.
102103
"""
103104
super().__init__()
104105
self.task_entrypoint = task_entrypoint
@@ -118,6 +119,7 @@ def __init__(
118119
self.pw_context_kwargs = pw_context_kwargs
119120
self.action_mapping = action_mapping
120121
self.use_raw_page_output = use_raw_page_output
122+
self.pre_observation_delay = pre_observation_delay
121123

122124
# check argument values
123125
assert tags_to_mark in ("all", "standard_html")
@@ -481,13 +483,19 @@ def post_step(
481483
logger.debug("Action executed")
482484
info["action_exec_stop"] = time.time()
483485

486+
info["wait_for_page_loading_start"] = time.time()
484487
# wait a bit (for the JavaScript callback to set the active page)
485-
time.sleep(0.5) # wait for JS events to be fired (half a second)
488+
logger.debug(f"Waiting {self.pre_observation_delay} seconds before extracting observation")
489+
time.sleep(self.pre_observation_delay) # wait for JS events to be fired
486490
self.context.cookies() # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading)
487491

488492
# wait for the network to idle before extracting the observation, reward etc.
489493
self._wait_dom_loaded()
490494

495+
info["wait_for_page_loading_stop"] = time.time()
496+
497+
info["validation_start"] = time.time()
498+
491499
if validate:
492500
# after the action is executed, the active page might have changed
493501
# perform a safety check
@@ -510,12 +518,16 @@ def post_step(
510518
info["task_info"] = {}
511519
logger.debug("Task validation skipped")
512520

521+
info["validation_stop"] = time.time()
522+
523+
info["get_observation_start"] = time.time()
513524
# add any user message sent by the task to the chat
514525
if user_message:
515526
self.chat.add_message(role="user", msg=user_message)
516527

517528
# extract observation (generic)
518529
obs = self._get_obs()
530+
info["get_observation_stop"] = time.time()
519531
logger.debug("Observation extracted")
520532

521533
# new step API wants a 5-tuple (gymnasium)

tests/core/test_gym_envs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,9 @@ def test_demo_mode(global_demo_mode: bool, demo_mode: str):
286286
# typing should be slow (only in demo mode)
287287
action_time = info["action_exec_stop"] - info["action_exec_start"]
288288
if demo_mode_active:
289-
assert action_time > 2
289+
assert action_time > 3.5
290290
else:
291-
assert action_time <= 1.5
291+
assert action_time <= 3.5
292292

293293
env.close()
294294

0 commit comments

Comments
 (0)