Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/core/assistant-message/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export const toolParamNames = [
"cwd",
"follow_up",
"task",
"size",
] as const

export type ToolParamName = (typeof toolParamNames)[number]
Expand Down Expand Up @@ -115,7 +116,7 @@ export interface ListCodeDefinitionNamesToolUse extends ToolUse {

export interface BrowserActionToolUse extends ToolUse {
name: "browser_action"
params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text">>
params: Partial<Pick<Record<ToolParamName, string>, "action" | "url" | "coordinate" | "text" | "size">>
}

export interface UseMcpToolToolUse extends ToolUse {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2719,6 +2719,8 @@ Parameters:
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
- Use with the \`text\` parameter to provide the string to type.
* resize: Resize the viewport to a specific w,h size.
- Use with the \`size\` parameter to specify the new size.
* scroll_down: Scroll down the page by one page height.
* scroll_up: Scroll up the page by one page height.
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
Expand All @@ -2727,6 +2729,8 @@ Parameters:
* Example: <url>https://example.com</url>
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **900x600** resolution.
* Example: <coordinate>450,300</coordinate>
- size: (optional) The width and height for the \`resize\` action.
* Example: <size>1280,720</size>
- text: (optional) Use this for providing the text for the \`type\` action.
* Example: <text>Hello, world!</text>
Usage:
Expand Down Expand Up @@ -3630,6 +3634,8 @@ Parameters:
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
- Use with the \`text\` parameter to provide the string to type.
* resize: Resize the viewport to a specific w,h size.
- Use with the \`size\` parameter to specify the new size.
* scroll_down: Scroll down the page by one page height.
* scroll_up: Scroll up the page by one page height.
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
Expand All @@ -3638,6 +3644,8 @@ Parameters:
* Example: <url>https://example.com</url>
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **1280x800** resolution.
* Example: <coordinate>450,300</coordinate>
- size: (optional) The width and height for the \`resize\` action.
* Example: <size>1280,720</size>
- text: (optional) Use this for providing the text for the \`type\` action.
* Example: <text>Hello, world!</text>
Usage:
Expand Down
4 changes: 4 additions & 0 deletions src/core/prompts/tools/browser-action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Parameters:
- Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot.
* type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text.
- Use with the \`text\` parameter to provide the string to type.
* resize: Resize the viewport to a specific w,h size.
- Use with the \`size\` parameter to specify the new size.
* scroll_down: Scroll down the page by one page height.
* scroll_up: Scroll up the page by one page height.
* close: Close the Puppeteer-controlled browser instance. This **must always be the final browser action**.
Expand All @@ -28,6 +30,8 @@ Parameters:
* Example: <url>https://example.com</url>
- coordinate: (optional) The X and Y coordinates for the \`click\` action. Coordinates should be within the **${args.browserViewportSize}** resolution.
* Example: <coordinate>450,300</coordinate>
- size: (optional) The width and height for the \`resize\` action.
* Example: <size>1280,720</size>
- text: (optional) Use this for providing the text for the \`type\` action.
* Example: <text>Hello, world!</text>
Usage:
Expand Down
13 changes: 13 additions & 0 deletions src/core/tools/browserActionTool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export async function browserActionTool(
const url: string | undefined = block.params.url
const coordinate: string | undefined = block.params.coordinate
const text: string | undefined = block.params.text
const size: string | undefined = block.params.size
if (!action || !browserActions.includes(action)) {
// checking for action to ensure it is complete and valid
if (!block.partial) {
Expand Down Expand Up @@ -88,6 +89,14 @@ export async function browserActionTool(
return
}
}
if (action === "resize") {
if (!size) {
cline.consecutiveMistakeCount++
pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size"))
await cline.browserSession.closeBrowser()
return
}
}
cline.consecutiveMistakeCount = 0
await cline.say(
"browser_action",
Expand All @@ -112,6 +121,9 @@ export async function browserActionTool(
case "scroll_up":
browserActionResult = await cline.browserSession.scrollUp()
break
case "resize":
browserActionResult = await cline.browserSession.resize(size!)
break
case "close":
browserActionResult = await cline.browserSession.closeBrowser()
break
Expand All @@ -124,6 +136,7 @@ export async function browserActionTool(
case "type":
case "scroll_down":
case "scroll_up":
case "resize":
await cline.say("browser_action_result", JSON.stringify(browserActionResult))
pushToolResult(
formatResponse.toolResult(
Expand Down
13 changes: 13 additions & 0 deletions src/services/browser/BrowserSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -538,4 +538,17 @@ export class BrowserSession {
})
})
}

async resize(size: string): Promise<BrowserActionResult> {
return this.doAction(async (page) => {
const [width, height] = size.split(",").map(Number)
const session = await page.createCDPSession()
await page.setViewport({ width, height })
const { windowId } = await session.send("Browser.getWindowForTarget")
await session.send("Browser.setWindowBounds", {
bounds: { width, height },
windowId,
})
})
}
}
12 changes: 11 additions & 1 deletion src/shared/ExtensionMessage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,23 @@ export interface ClineSayTool {
}

// Must keep in sync with system prompt.
export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const
export const browserActions = [
"launch",
"click",
"hover",
"type",
"scroll_down",
"scroll_up",
"resize",
"close",
] as const

export type BrowserAction = (typeof browserActions)[number]

export interface ClineSayBrowserAction {
action: BrowserAction
coordinate?: string
size?: string
text?: string
}

Expand Down