-
Notifications
You must be signed in to change notification settings - Fork 144
Some changes in preparation for agentlab's new ToolUseAgent and the new APIs #340
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
79d4141
80e8843
31c3224
3d19bea
6de7a02
5911c1e
be39f6f
2703be9
ecb4632
26ac1e9
be0e45d
da73bf2
709332a
fca18ff
3dca1b7
808ce72
02978dd
67e7302
b2583f9
616f6b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
|
|
||
| from . import utils | ||
| from .base import AbstractActionSet | ||
| from .functions import ( # check,; uncheck, | ||
| from .functions import ( | ||
| clear, | ||
| click, | ||
| dblclick, | ||
|
|
@@ -514,6 +514,56 @@ def to_python_code(self, action): | |
| # return the constructed python code | ||
| return python_code | ||
|
|
||
| def to_tool_description(self, api="openai") -> list[dict]: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing API Validation in Tool Description Generation
Tell me moreWhat is the issue?The to_tool_description method defaults to 'openai' API but lacks validation for unsupported API values. Why this mattersInvalid API values could lead to unexpected behavior as the method will use 'parameters' as the default schema key for any unrecognized API value. Suggested change ∙ Feature PreviewAdd validation for supported API values: def to_tool_description(self, api="openai") -> list[dict]:
supported_apis = {"openai", "anthropic"}
if api not in supported_apis:
raise ValueError(f"Unsupported API: {api}. Must be one of {supported_apis}")
schema_keys = {
"openai": "parameters",
"anthropic": "input_schema",
}
schema = schema_keys[api] # Will now always be validProvide feedback to improve future suggestions💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| """ | ||
| Translates actions to tool descriptions following the OpenAI API format. | ||
|
|
||
| Returns: | ||
| A list of tool descriptors. | ||
| """ | ||
| schema_keys = { | ||
| "openai": "parameters", | ||
| "anthropic": "input_schema", | ||
| } | ||
| schema = schema_keys.get(api, "parameters") | ||
| tools = [] | ||
| for tool_name, action in self.action_set.items(): | ||
| # Parse the signature to extract parameter names and types | ||
| parameters = {"type": "object", "properties": {}, "required": []} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isolate from here into a function get_tool(name, action) -> tool and unit test it. |
||
| signature = inspect.signature(action) | ||
| for param_name, param in signature.parameters.items(): | ||
| param_type = "string" # Default to string if type is not specified | ||
| if param.annotation != inspect.Parameter.empty: | ||
| if param.annotation == str: | ||
|
||
| param_type = "string" | ||
| elif param.annotation == float or param.annotation == int: | ||
| param_type = "number" | ||
| elif param.annotation == bool: | ||
| param_type = "boolean" | ||
| elif param.annotation == dict: | ||
| param_type = "object" | ||
| elif param.annotation == list: | ||
| param_type = "array" | ||
|
|
||
| parameters["properties"][param_name] = { | ||
| "type": param_type, | ||
| # "description": f"Parameter {param_name} of type {param_type}", | ||
| } | ||
| if param.default == inspect.Parameter.empty: | ||
| parameters["required"].append(param_name) | ||
|
|
||
| # Construct the tool descriptor | ||
| tool = { | ||
| "name": tool_name, | ||
| "description": action.description, | ||
| schema: parameters, | ||
| } | ||
| if api == "openai": | ||
| tool["type"] = "function" | ||
| tools.append(tool) | ||
|
|
||
| return tools | ||
|
|
||
|
|
||
| # consistency checks | ||
| assert "custom" not in ACTION_SUBSETS | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,7 @@ | |
| extract_merged_axtree, | ||
| extract_screenshot, | ||
| ) | ||
| from .spaces import AnyBox, AnyDict, Float, Unicode | ||
| from .spaces import AnyBox, AnyDict, Anything, Float, Unicode | ||
| from .task import AbstractBrowserTask | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
@@ -76,6 +76,7 @@ def __init__( | |
| pw_context_kwargs: dict = {}, | ||
| # agent-related arguments | ||
| action_mapping: Optional[callable] = HighLevelActionSet().to_python_code, | ||
| use_raw_page_output: bool = False, | ||
| ): | ||
| """ | ||
| Instantiate a ready to use BrowserEnv gym environment. | ||
|
|
@@ -96,6 +97,7 @@ def __init__( | |
| pw_chromium_kwargs: extra parameters for the playwright Browser. Should only be used for debugging/testing. | ||
| pw_context_kwargs: extra parameters for the playwright BrowserContext. Should only be used for debugging/testing. | ||
| action_mapping: if set, the environment will use this function to map every received action to executable Python code. | ||
| use_raw_page_output: if set, the environment will use the raw page output instead of the default processing. | ||
|
|
||
| """ | ||
| super().__init__() | ||
|
|
@@ -115,6 +117,7 @@ def __init__( | |
| self.pw_chromium_kwargs = pw_chromium_kwargs | ||
| self.pw_context_kwargs = pw_context_kwargs | ||
| self.action_mapping = action_mapping | ||
| self.use_raw_page_output = use_raw_page_output | ||
|
|
||
| # check argument values | ||
| assert tags_to_mark in ("all", "standard_html") | ||
|
|
@@ -132,42 +135,67 @@ def __init__( | |
| self.chat: Chat = None | ||
|
|
||
| # observation space | ||
| self.observation_space = gym.spaces.Dict( | ||
| { | ||
| "chat_messages": gym.spaces.Sequence( | ||
| gym.spaces.Dict( | ||
| { | ||
| "role": Unicode(), | ||
| "timestamp": Float(), | ||
| "message": Unicode(), | ||
| } | ||
| ) | ||
| ), | ||
| "goal": Unicode(), | ||
| "goal_object": gym.spaces.Sequence(AnyDict()), | ||
| "open_pages_urls": gym.spaces.Sequence(Unicode()), | ||
| "open_pages_titles": gym.spaces.Sequence(Unicode()), | ||
| "active_page_index": gym.spaces.Box( | ||
| low=0, high=255, dtype=int | ||
| ), # TODO: change to an Integer (breaking change for users) | ||
| "url": Unicode(), | ||
| "screenshot": AnyBox( | ||
| low=0, | ||
| high=255, | ||
| shape=(-1, -1, 3), | ||
| dtype=np.uint8, | ||
| ), # swapped axes (height, width, RGB) | ||
| "dom_object": AnyDict(), | ||
| "axtree_object": AnyDict(), | ||
| "extra_element_properties": AnyDict(), | ||
| "focused_element_bid": Unicode(), | ||
| "last_action": Unicode(), | ||
| "last_action_error": Unicode(), | ||
| "elapsed_time": gym.spaces.Box( | ||
| low=0, high=np.inf, dtype=float | ||
| ), # TODO: change to a Float (breaking change for users) | ||
| } | ||
| ) | ||
| if use_raw_page_output: | ||
| self.observation_space = gym.spaces.Dict( | ||
| { | ||
| "page": Anything(), | ||
| "chat_messages": gym.spaces.Sequence( | ||
| gym.spaces.Dict( | ||
| { | ||
| "role": Unicode(), | ||
| "timestamp": Float(), | ||
| "message": Unicode(), | ||
| } | ||
| ) | ||
| ), | ||
| "goal": Unicode(), | ||
| "goal_object": gym.spaces.Sequence(AnyDict()), | ||
| "open_pages_urls": gym.spaces.Sequence(Unicode()), | ||
| "open_pages_titles": gym.spaces.Sequence(Unicode()), | ||
| "active_page_index": gym.spaces.Box(low=0, high=255, dtype=int), | ||
| "url": Unicode(), | ||
| "last_action": Unicode(), | ||
| "last_action_error": Unicode(), | ||
| "elapsed_time": gym.spaces.Box(low=0, high=np.inf, dtype=float), | ||
| } | ||
| ) | ||
| else: | ||
| self.observation_space = gym.spaces.Dict( | ||
| { | ||
| "chat_messages": gym.spaces.Sequence( | ||
| gym.spaces.Dict( | ||
| { | ||
| "role": Unicode(), | ||
| "timestamp": Float(), | ||
| "message": Unicode(), | ||
| } | ||
| ) | ||
| ), | ||
| "goal": Unicode(), | ||
| "goal_object": gym.spaces.Sequence(AnyDict()), | ||
| "open_pages_urls": gym.spaces.Sequence(Unicode()), | ||
| "open_pages_titles": gym.spaces.Sequence(Unicode()), | ||
| "active_page_index": gym.spaces.Box( | ||
| low=0, high=255, dtype=int | ||
| ), # TODO: change to an Integer (breaking change for users) | ||
| "url": Unicode(), | ||
| "screenshot": AnyBox( | ||
| low=0, | ||
| high=255, | ||
| shape=(-1, -1, 3), | ||
| dtype=np.uint8, | ||
| ), # swapped axes (height, width, RGB) | ||
| "dom_object": AnyDict(), | ||
| "axtree_object": AnyDict(), | ||
| "extra_element_properties": AnyDict(), | ||
| "focused_element_bid": Unicode(), | ||
| "last_action": Unicode(), | ||
| "last_action_error": Unicode(), | ||
| "elapsed_time": gym.spaces.Box( | ||
| low=0, high=np.inf, dtype=float | ||
| ), # TODO: change to a Float (breaking change for users) | ||
| } | ||
| ) | ||
|
|
||
| # action space | ||
| self.action_space = Unicode() | ||
|
|
@@ -527,7 +555,23 @@ def _active_page_check(self): | |
| raise RuntimeError(f"Unexpected: active page has been closed ({self.page}).") | ||
|
|
||
| def _get_obs(self): | ||
|
|
||
| if self.use_raw_page_output: | ||
| obs = { | ||
| "page": self.page, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unsafe Raw Page Object Exposure
Tell me moreWhat is the issue?Direct exposure of the raw Playwright page object in the observation space when use_raw_page_output=True. Why this mattersExposing the raw browser page object gives unrestricted access to potentially sensitive page data and browser control methods that could be exploited maliciously. Suggested change ∙ Feature PreviewInstead of exposing the raw page object, create a restricted interface that only exposes necessary page properties and methods: "page": {
"url": self.page.url,
"title": self.page.title(),
# Add other needed properties with proper sanitization
}Provide feedback to improve future suggestions💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| "chat_messages": tuple(copy.deepcopy(self.chat.messages)), | ||
| "goal": _try_to_extract_legacy_goal(self.goal_object), # legacy goal, deprecated | ||
| "goal_object": tuple( | ||
| copy.deepcopy(self.goal_object) | ||
| ), # new goal format, list of messages openai style | ||
| "open_pages_urls": tuple(page.url for page in self.context.pages), | ||
| "open_pages_titles": tuple(page.title() for page in self.context.pages), | ||
| "active_page_index": np.asarray([self.context.pages.index(self.page)]), | ||
| "url": self.page.url, # redundant with "open_pages_urls" and "active_page_index" | ||
| "last_action": self.last_action, | ||
| "last_action_error": self.last_action_error, | ||
| "elapsed_time": np.asarray([time.time() - self.start_time]), | ||
| } | ||
| return obs | ||
| for retries_left in reversed(range(EXTRACT_OBS_MAX_TRIES)): | ||
| try: | ||
|
Comment on lines
604
to
623
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Observation extraction logic lacks clear structure
Tell me moreWhat is the issue?The _get_obs method has a complex structure with two very different code paths and duplicated field extraction logic. Why this mattersThe method's structure makes it hard to understand what fields are common between the two observation types and obscures the relationship between the observation_space definition and actual observation creation. Suggested change ∙ Feature PreviewSplit the method into smaller, focused functions: def _get_obs(self):
base_obs = self._get_base_observation()
if self.use_raw_page_output:
return base_obs
return {**base_obs, **self._get_extended_observation()}
def _get_base_observation(self):
return {
"chat_messages": tuple(copy.deepcopy(self.chat.messages)),
# ... other common fields
}
def _get_extended_observation(self):
return self._extract_with_retries()Provide feedback to improve future suggestions💬 Looking for more details? Reply to this comment to chat with Korbit. |
||
| # pre-extraction, mark dom elements (set bid, set dynamic attributes like value and checked) | ||
|
|
||
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like a good candidate for an easy unit test. But perhaps we could isolate it into a separate function first. Perhaps
tool = get_tool(tool_name, action).