From fe9a3efa1dc6c84e89ecbaa13bb95ed843ce4e5b Mon Sep 17 00:00:00 2001 From: SplittyDev Date: Mon, 7 Apr 2025 16:33:23 +0700 Subject: [PATCH 1/2] Implement resize action for browser action tool --- src/core/assistant-message/index.ts | 3 ++- src/core/prompts/tools/browser-action.ts | 4 ++++ src/core/tools/browserActionTool.ts | 13 +++++++++++++ src/services/browser/BrowserSession.ts | 13 +++++++++++++ src/shared/ExtensionMessage.ts | 12 +++++++++++- 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/core/assistant-message/index.ts b/src/core/assistant-message/index.ts index 59f9a578b4b..77c2f6c403c 100644 --- a/src/core/assistant-message/index.ts +++ b/src/core/assistant-message/index.ts @@ -60,6 +60,7 @@ export const toolParamNames = [ "cwd", "follow_up", "task", + "size", ] as const export type ToolParamName = (typeof toolParamNames)[number] @@ -115,7 +116,7 @@ export interface ListCodeDefinitionNamesToolUse extends ToolUse { export interface BrowserActionToolUse extends ToolUse { name: "browser_action" - params: Partial, "action" | "url" | "coordinate" | "text">> + params: Partial, "action" | "url" | "coordinate" | "text" | "size">> } export interface UseMcpToolToolUse extends ToolUse { diff --git a/src/core/prompts/tools/browser-action.ts b/src/core/prompts/tools/browser-action.ts index 9b5f1c4ee82..510bf7b794f 100644 --- a/src/core/prompts/tools/browser-action.ts +++ b/src/core/prompts/tools/browser-action.ts @@ -20,6 +20,8 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. * scroll_up: Scroll up the page by one page height. * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. @@ -28,6 +30,8 @@ Parameters: * Example: https://example.com - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${args.browserViewportSize}** resolution. * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: diff --git a/src/core/tools/browserActionTool.ts b/src/core/tools/browserActionTool.ts index 8a9051070d8..de6e8c1c7f1 100644 --- a/src/core/tools/browserActionTool.ts +++ b/src/core/tools/browserActionTool.ts @@ -21,6 +21,7 @@ export async function browserActionTool( const url: string | undefined = block.params.url const coordinate: string | undefined = block.params.coordinate const text: string | undefined = block.params.text + const size: string | undefined = block.params.size if (!action || !browserActions.includes(action)) { // checking for action to ensure it is complete and valid if (!block.partial) { @@ -88,6 +89,14 @@ export async function browserActionTool( return } } + if (action === "resize") { + if (!size) { + cline.consecutiveMistakeCount++ + pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size")) + await cline.browserSession.closeBrowser() + return + } + } cline.consecutiveMistakeCount = 0 await cline.say( "browser_action", @@ -112,6 +121,9 @@ export async function browserActionTool( case "scroll_up": browserActionResult = await cline.browserSession.scrollUp() break + case "resize": + browserActionResult = await cline.browserSession.resize(size!) + break case "close": browserActionResult = await cline.browserSession.closeBrowser() break @@ -124,6 +136,7 @@ export async function browserActionTool( case "type": case "scroll_down": case "scroll_up": + case "resize": await cline.say("browser_action_result", JSON.stringify(browserActionResult)) pushToolResult( formatResponse.toolResult( diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index 7f8963fe1df..241865a5488 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -538,4 +538,17 @@ export class BrowserSession { }) }) } + + async resize(size: string): Promise { + return this.doAction(async (page) => { + const [width, height] = size.split(",").map(Number) + const session = await page.createCDPSession() + await page.setViewport({ width, height }) + const { windowId } = await session.send("Browser.getWindowForTarget") + await session.send("Browser.setWindowBounds", { + bounds: { width, height }, + windowId, + }) + }) + } } diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 095279ffded..d3cef192011 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -232,13 +232,23 @@ export interface ClineSayTool { } // Must keep in sync with system prompt. -export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const +export const browserActions = [ + "launch", + "click", + "hover", + "type", + "scroll_down", + "scroll_up", + "resize", + "close", +] as const export type BrowserAction = (typeof browserActions)[number] export interface ClineSayBrowserAction { action: BrowserAction coordinate?: string + size?: string text?: string } From bdf2663401d60b2f2264345f75ffb14e1ad349bd Mon Sep 17 00:00:00 2001 From: SplittyDev Date: Mon, 7 Apr 2025 16:40:59 +0700 Subject: [PATCH 2/2] Update snapshots --- .../prompts/__tests__/__snapshots__/system.test.ts.snap | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap b/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap index e9538bc308b..798aed2976c 100644 --- a/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap +++ b/src/core/prompts/__tests__/__snapshots__/system.test.ts.snap @@ -2719,6 +2719,8 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. * scroll_up: Scroll up the page by one page height. * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. @@ -2727,6 +2729,8 @@ Parameters: * Example: https://example.com - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution. * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: @@ -3630,6 +3634,8 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * resize: Resize the viewport to a specific w,h size. + - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. * scroll_up: Scroll up the page by one page height. * close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**. @@ -3638,6 +3644,8 @@ Parameters: * Example: https://example.com - coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **1280x800** resolution. * Example: 450,300 +- size: (optional) The width and height for the \`resize\` action. + * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: