Skip to content

Commit fff8fdd

Browse files
authored
feat(browserTool): Implement resize action (#2370)
* Implement resize action for browser action tool * Update snapshots
1 parent 88cac3c commit fff8fdd

File tree

6 files changed

+51
-2
lines changed

6 files changed

+51
-2
lines changed

src/core/assistant-message/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ export const toolParamNames = [
6060
"cwd",
6161
"follow_up",
6262
"task",
63+
"size",
6364
] as const
6465

6566
export type ToolParamName = (typeof toolParamNames)[number]
@@ -115,7 +116,7 @@ export interface ListCodeDefinitionNamesToolUse extends ToolUse {
115116

116117
export interface BrowserActionToolUse extends ToolUse {
117118
name: "browser_action"
118-
params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text">>
119+
params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text" | "size">>
119120
}
120121

121122
export interface UseMcpToolToolUse extends ToolUse {

src/core/prompts/__tests__/__snapshots__/system.test.ts.snap

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2719,6 +2719,8 @@ Parameters:
27192719
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
27202720
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
27212721
- Use with the \`text\` parameter to provide the string to type.
2722+
* resize: Resize the viewport to a specific w,h size.
2723+
- Use with the \`size\` parameter to specify the new size.
27222724
* scroll_down: Scroll down the page by one page height.
27232725
* scroll_up: Scroll up the page by one page height.
27242726
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -2727,6 +2729,8 @@ Parameters:
27272729
* Example: <url>https://example.com</url>
27282730
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution.
27292731
* Example: <coordinate>450,300</coordinate>
2732+
- size: (optional) The width and height for the \`resize\` action.
2733+
* Example: <size>1280,720</size>
27302734
- text: (optional) Use this for providing the text for the \`type\` action.
27312735
* Example: <text>Hello, world!</text>
27322736
Usage:
@@ -3630,6 +3634,8 @@ Parameters:
36303634
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
36313635
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
36323636
- Use with the \`text\` parameter to provide the string to type.
3637+
* resize: Resize the viewport to a specific w,h size.
3638+
- Use with the \`size\` parameter to specify the new size.
36333639
* scroll_down: Scroll down the page by one page height.
36343640
* scroll_up: Scroll up the page by one page height.
36353641
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -3638,6 +3644,8 @@ Parameters:
36383644
* Example: <url>https://example.com</url>
36393645
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **1280x800** resolution.
36403646
* Example: <coordinate>450,300</coordinate>
3647+
- size: (optional) The width and height for the \`resize\` action.
3648+
* Example: <size>1280,720</size>
36413649
- text: (optional) Use this for providing the text for the \`type\` action.
36423650
* Example: <text>Hello, world!</text>
36433651
Usage:

src/core/prompts/tools/browser-action.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ Parameters:
2020
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
2121
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
2222
- Use with the \`text\` parameter to provide the string to type.
23+
* resize: Resize the viewport to a specific w,h size.
24+
- Use with the \`size\` parameter to specify the new size.
2325
* scroll_down: Scroll down the page by one page height.
2426
* scroll_up: Scroll up the page by one page height.
2527
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
@@ -28,6 +30,8 @@ Parameters:
2830
* Example: <url>https://example.com</url>
2931
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${args.browserViewportSize}** resolution.
3032
* Example: <coordinate>450,300</coordinate>
33+
- size: (optional) The width and height for the \`resize\` action.
34+
* Example: <size>1280,720</size>
3135
- text: (optional) Use this for providing the text for the \`type\` action.
3236
* Example: <text>Hello, world!</text>
3337
Usage:

src/core/tools/browserActionTool.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export async function browserActionTool(
2121
const url: string | undefined = block.params.url
2222
const coordinate: string | undefined = block.params.coordinate
2323
const text: string | undefined = block.params.text
24+
const size: string | undefined = block.params.size
2425
if (!action || !browserActions.includes(action)) {
2526
// checking for action to ensure it is complete and valid
2627
if (!block.partial) {
@@ -88,6 +89,14 @@ export async function browserActionTool(
8889
return
8990
}
9091
}
92+
if (action === "resize") {
93+
if (!size) {
94+
cline.consecutiveMistakeCount++
95+
pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size"))
96+
await cline.browserSession.closeBrowser()
97+
return
98+
}
99+
}
91100
cline.consecutiveMistakeCount = 0
92101
await cline.say(
93102
"browser_action",
@@ -112,6 +121,9 @@ export async function browserActionTool(
112121
case "scroll_up":
113122
browserActionResult = await cline.browserSession.scrollUp()
114123
break
124+
case "resize":
125+
browserActionResult = await cline.browserSession.resize(size!)
126+
break
115127
case "close":
116128
browserActionResult = await cline.browserSession.closeBrowser()
117129
break
@@ -124,6 +136,7 @@ export async function browserActionTool(
124136
case "type":
125137
case "scroll_down":
126138
case "scroll_up":
139+
case "resize":
127140
await cline.say("browser_action_result", JSON.stringify(browserActionResult))
128141
pushToolResult(
129142
formatResponse.toolResult(

src/services/browser/BrowserSession.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,4 +538,17 @@ export class BrowserSession {
538538
})
539539
})
540540
}
541+
542+
async resize(size: string): Promise<BrowserActionResult> {
543+
return this.doAction(async (page) => {
544+
const [width, height] = size.split(",").map(Number)
545+
const session = await page.createCDPSession()
546+
await page.setViewport({ width, height })
547+
const { windowId } = await session.send("Browser.getWindowForTarget")
548+
await session.send("Browser.setWindowBounds", {
549+
bounds: { width, height },
550+
windowId,
551+
})
552+
})
553+
}
541554
}

src/shared/ExtensionMessage.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,23 @@ export interface ClineSayTool {
233233
}
234234

235235
// Must keep in sync with system prompt.
236-
export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const
236+
export const browserActions = [
237+
"launch",
238+
"click",
239+
"hover",
240+
"type",
241+
"scroll_down",
242+
"scroll_up",
243+
"resize",
244+
"close",
245+
] as const
237246

238247
export type BrowserAction = (typeof browserActions)[number]
239248

240249
export interface ClineSayBrowserAction {
241250
action: BrowserAction
242251
coordinate?: string
252+
size?: string
243253
text?: string
244254
}
245255

0 commit comments

Comments
 (0)