Skip to content
Merged
14 changes: 11 additions & 3 deletions browsergym/core/src/browsergym/core/action/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,32 @@ def noop(wait_ms: float = 1000):


# https://playwright.dev/docs/input#text-input
def fill(bid: str, value: str):
def fill(bid: str, value: str, enable_autocomplete_menu: bool = False):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe trigger_autocomplete: bool would be more explicit?

"""
Fill out a form field. It focuses the element and triggers an input event with the entered text.
It works for <input>, <textarea> and [contenteditable] elements.

Examples:
fill('237', 'example value')
fill('45', "multi-line\\nexample")
fill('a12', "example with \\"quotes\\"")
fill('b534', "Montre", True) # This will trigger the autocomplete menu if available
"""
elem = get_elem_by_bid(page, bid, demo_mode != "off")
add_demo_mode_effects(page, elem, bid, demo_mode=demo_mode, move_cursor=False)

def do(force: bool):
if demo_mode != "off":
# Demo mode: full typing for visual effect
delay = max(2000 / len(value), 10)
elem.clear(force=force, timeout=500)
elem.type(value, delay=delay, timeout=0) # no timeout
elem.type(value, delay=delay, timeout=0)
elif enable_autocomplete_menu:
# Hybrid: fill n-1 chars, type the last one to trigger autocomplete
elem.clear(force=force, timeout=500)
if len(value) > 1:
elem.fill(value[:-1], force=force, timeout=500)
if len(value) > 0:
elem.type(value[-1], delay=0, timeout=500)
else:
elem.fill(value, force=force, timeout=500)

Expand Down
2 changes: 2 additions & 0 deletions browsergym/core/src/browsergym/core/action/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,8 @@ def to_tool_description(self, api="openai", add_examples=True) -> list[dict]:
}
if param.default == inspect.Parameter.empty:
parameters["required"].append(param_name)
else:
parameters["properties"][param_name]["default"] = param.default

# Construct the tool descriptor
description = action.description
Expand Down
16 changes: 14 additions & 2 deletions browsergym/core/src/browsergym/core/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
# agent-related arguments
action_mapping: Optional[callable] = HighLevelActionSet().to_python_code,
use_raw_page_output: bool = False,
pre_observation_delay: float = 0.5, # seconds
):
"""
Instantiate a ready to use BrowserEnv gym environment.
Expand All @@ -98,7 +99,7 @@ def __init__(
pw_context_kwargs: extra parameters for the playwright BrowserContext. Should only be used for debugging/testing.
action_mapping: if set, the environment will use this function to map every received action to executable Python code.
use_raw_page_output: if set, the environment will use the raw page output instead of the default processing.

pre_observation_delay: float = 0.5, number of seconds to wait before starting to extract the observation. This can be important if there are some auto-complete menu that may appear after filling a field.
"""
super().__init__()
self.task_entrypoint = task_entrypoint
Expand All @@ -118,6 +119,7 @@ def __init__(
self.pw_context_kwargs = pw_context_kwargs
self.action_mapping = action_mapping
self.use_raw_page_output = use_raw_page_output
self.pre_observation_delay = pre_observation_delay

# check argument values
assert tags_to_mark in ("all", "standard_html")
Expand Down Expand Up @@ -481,13 +483,19 @@ def post_step(
logger.debug("Action executed")
info["action_exec_stop"] = time.time()

info["wait_for_page_loading_start"] = time.time()
# wait a bit (for the JavaScript callback to set the active page)
time.sleep(0.5) # wait for JS events to be fired (half a second)
logger.debug(f"Waiting {self.pre_observation_delay} seconds before extracting observation")
time.sleep(self.pre_observation_delay) # wait for JS events to be fired
self.context.cookies() # trigger all waiting Playwright callbacks on the stack (hack, see https://playwright.dev/java/docs/multithreading)
Comment on lines +489 to 490

This comment was marked as resolved.


# wait for the network to idle before extracting the observation, reward etc.
self._wait_dom_loaded()

info["wait_for_page_loading_stop"] = time.time()

info["validation_start"] = time.time()

if validate:
# after the action is executed, the active page might have changed
# perform a safety check
Expand All @@ -510,12 +518,16 @@ def post_step(
info["task_info"] = {}
logger.debug("Task validation skipped")

info["validation_stop"] = time.time()

info["get_observation_start"] = time.time()
# add any user message sent by the task to the chat
if user_message:
self.chat.add_message(role="user", msg=user_message)

# extract observation (generic)
obs = self._get_obs()
info["get_observation_stop"] = time.time()
logger.debug("Observation extracted")

# new step API wants a 5-tuple (gymnasium)
Expand Down
4 changes: 2 additions & 2 deletions tests/core/test_gym_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ def test_demo_mode(global_demo_mode: bool, demo_mode: str):
# typing should be slow (only in demo mode)
action_time = info["action_exec_stop"] - info["action_exec_start"]
if demo_mode_active:
assert action_time > 2
assert action_time > 3.5
else:
assert action_time <= 1.5
assert action_time <= 3.5

env.close()

Expand Down