|
1 | 1 | import type OpenAI from "openai" |
2 | 2 |
|
| 3 | +const BROWSER_ACTION_DESCRIPTION = `Request to interact with a Puppeteer-controlled browser. Every action, except close, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. |
| 4 | +
|
| 5 | +Browser Session Lifecycle: |
| 6 | +- Browser sessions start with launch and end with close |
| 7 | +- The session remains active across multiple messages and tool uses |
| 8 | +- You can use other tools while the browser session is active - it will stay open in the background` |
| 9 | + |
| 10 | +const ACTION_PARAMETER_DESCRIPTION = `Browser action to perform` |
| 11 | + |
| 12 | +const URL_PARAMETER_DESCRIPTION = `URL to open when performing the launch action; must include protocol` |
| 13 | + |
| 14 | +const COORDINATE_PARAMETER_DESCRIPTION = `Screen coordinate for hover or click actions in format 'x,y@WIDTHxHEIGHT' where x,y is the target position on the screenshot image and WIDTHxHEIGHT is the exact pixel dimensions of the screenshot image (not the browser viewport). Example: '450,203@900x600' means click at (450,203) on a 900x600 screenshot. The coordinates will be automatically scaled to match the actual viewport dimensions.` |
| 15 | + |
| 16 | +const SIZE_PARAMETER_DESCRIPTION = `Viewport dimensions for the resize action in format 'WIDTHxHEIGHT' or 'WIDTH,HEIGHT'. Example: '1280x800' or '1280,800'` |
| 17 | + |
| 18 | +const TEXT_PARAMETER_DESCRIPTION = `Text to type when performing the type action, or key name to press when performing the press action (e.g., 'Enter', 'Tab', 'Escape')` |
| 19 | + |
3 | 20 | export default { |
4 | 21 | type: "function", |
5 | 22 | function: { |
6 | 23 | name: "browser_action", |
7 | | - description: |
8 | | - "Interact with a browser session. Always start by launching at a URL and always finish by closing the browser. While the browser is active, do not call any other tools. Use coordinates within the viewport to hover or click, provide text for typing, and ensure actions are grounded in the latest screenshot and console logs.", |
| 24 | + description: BROWSER_ACTION_DESCRIPTION, |
9 | 25 | strict: true, |
10 | 26 | parameters: { |
11 | 27 | type: "object", |
12 | 28 | properties: { |
13 | 29 | action: { |
14 | 30 | type: "string", |
15 | | - description: "Browser action to perform", |
| 31 | + description: ACTION_PARAMETER_DESCRIPTION, |
16 | 32 | enum: ["launch", "click", "hover", "type", "press", "scroll_down", "scroll_up", "resize", "close"], |
17 | 33 | }, |
18 | 34 | url: { |
19 | 35 | type: ["string", "null"], |
20 | | - description: "URL to open when performing the launch action; must include protocol", |
| 36 | + description: URL_PARAMETER_DESCRIPTION, |
21 | 37 | }, |
22 | 38 | coordinate: { |
23 | 39 | type: ["string", "null"], |
24 | | - description: |
25 | | - "Screen coordinate for hover or click actions in format 'x,y@WIDTHxHEIGHT' where x,y is the target position on the screenshot image and WIDTHxHEIGHT is the exact pixel dimensions of the screenshot image (not the browser viewport). Example: '450,203@900x600' means click at (450,203) on a 900x600 screenshot. The coordinates will be automatically scaled to match the actual viewport dimensions.", |
| 40 | + description: COORDINATE_PARAMETER_DESCRIPTION, |
26 | 41 | }, |
27 | 42 | size: { |
28 | 43 | type: ["string", "null"], |
29 | | - description: |
30 | | - "Viewport dimensions for the resize action in format 'WIDTHxHEIGHT' or 'WIDTH,HEIGHT'. Example: '1280x800' or '1280,800'", |
| 44 | + description: SIZE_PARAMETER_DESCRIPTION, |
31 | 45 | }, |
32 | 46 | text: { |
33 | 47 | type: ["string", "null"], |
34 | | - description: |
35 | | - "Text to type when performing the type action, or key name to press when performing the press action (e.g., 'Enter', 'Tab', 'Escape')", |
| 48 | + description: TEXT_PARAMETER_DESCRIPTION, |
36 | 49 | }, |
37 | 50 | }, |
38 | 51 | required: ["action"], |
|
0 commit comments