diff --git a/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap b/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap index 616d14700f..531ed8f3e1 100644 --- a/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap +++ b/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap @@ -272,6 +272,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -766,6 +819,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -1260,6 +1366,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -2304,6 +2463,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -3504,6 +3716,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -3998,7 +4263,60 @@ Examples: true -## execute_command +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + +## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: - command: (required) The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions. @@ -4534,6 +4852,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -5105,6 +5476,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## ask_followup_question Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth. Parameters: @@ -5487,6 +5911,59 @@ Examples: src/ +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## ask_followup_question Description: Ask the user a question to gather additional information needed to complete the task. This tool should be used when you encounter ambiguities, need clarification, or require more details to proceed effectively. It allows for interactive problem-solving by enabling direct communication with the user. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth. Parameters: @@ -5992,6 +6469,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: @@ -6577,6 +7107,59 @@ Examples: true +## browser_action +Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. +- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. +- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. +- The browser window has a resolution of **undefined** pixels. When performing any click actions, ensure the coordinates are within this resolution range. +- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. +Parameters: +- action: (required) The action to perform. The available actions are: + * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. + - Use with the \`url\` parameter to provide the URL. + - Ensure the URL is valid and includes the appropriate protocol (e.g. http://localhost:3000/page, file:///path/to/file.html, etc.) + * hover: Move the cursor to a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always move to the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * click: Click at a specific x,y coordinate. + - Use with the \`coordinate\` parameter to specify the location. + - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. + * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. + - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. + * scroll_down: Scroll down the page by one page height. + * scroll_up: Scroll up the page by one page height. + * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. + - Example: \`close\` +- url: (optional) Use this for providing the URL for the \`launch\` action. + * Example: https://example.com +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **undefined** resolution. + * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 +- text: (optional) Use this for providing the text for the \`type\` action. + * Example: Hello, world! +Usage: + +Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +URL to launch the browser at (optional) +x,y coordinates (optional) +Text to type (optional) + + +Example: Requesting to launch a browser at https://example.com + +launch +https://example.com + + +Example: Requesting to click on the element at coordinates 450,300 + +click +450,300 + + ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: \`touch ./testdata/example.file\`, \`dir ./examples/model1/data/yaml\`, or \`go test ./cmd/front --config ./cmd/front/config.yml\`. If directed by the user, you may open a terminal in a different directory by using the \`cwd\` parameter. Parameters: diff --git a/src/core/prompts/tools/browser-action.ts b/src/core/prompts/tools/browser-action.ts index e1b33b9d7d..64f7cf3829 100644 --- a/src/core/prompts/tools/browser-action.ts +++ b/src/core/prompts/tools/browser-action.ts @@ -1,9 +1,6 @@ import { ToolArgs } from "./types" export function getBrowserActionDescription(args: ToolArgs): string | undefined { - if (!args.supportsComputerUse) { - return undefined - } return `## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. - The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL.