diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index edbde32ea7..900c517933 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -214,6 +214,7 @@ export class Task extends EventEmitter { // Computer User browserSession: BrowserSession + private lastBrowserScreenshotMessageId?: string // Track the last message with browser screenshot // Editing diffViewProvider: DiffViewProvider @@ -508,6 +509,57 @@ export class Task extends EventEmitter { await this.saveApiConversationHistory() } + /** + * Add a browser action result to conversation history, removing previous browser screenshots + * to prevent hitting provider image limits (e.g., AWS Bedrock's 20-image limit). + */ + async addBrowserActionToApiHistory( + toolResult: string | Array, + ) { + // Remove previous browser screenshot from conversation history + if (this.lastBrowserScreenshotMessageId) { + // Find and remove images from the last browser action message + for (let i = this.apiConversationHistory.length - 1; i >= 0; i--) { + const message = this.apiConversationHistory[i] + if (message.role === "user" && Array.isArray(message.content)) { + // Check if this message contains the last browser screenshot + const hasToolResult = message.content.some( + (block) => block.type === "text" && block.text.includes("[browser_action Result]"), + ) + if (hasToolResult) { + // Remove image blocks from this message, keep only text blocks + message.content = message.content.filter((block) => block.type === "text") + break + } + } + } + } + + // Add the new browser action result + const content = Array.isArray(toolResult) ? toolResult : [{ type: "text" as const, text: toolResult }] + const messageWithTs = { + role: "user" as const, + content, + ts: Date.now(), + } + + // Track this message if it contains images + const hasImages = Array.isArray(toolResult) && toolResult.some((block) => block.type === "image") + if (hasImages) { + this.lastBrowserScreenshotMessageId = messageWithTs.ts.toString() + } + + this.apiConversationHistory.push(messageWithTs) + await this.saveApiConversationHistory() + } + + /** + * Reset browser screenshot tracking when browser is closed + */ + resetBrowserScreenshotTracking() { + this.lastBrowserScreenshotMessageId = undefined + } + async overwriteApiConversationHistory(newHistory: ApiMessage[]) { this.apiConversationHistory = newHistory await this.saveApiConversationHistory() diff --git a/src/core/tools/browserActionTool.ts b/src/core/tools/browserActionTool.ts index 13cb9b0ec2..913413e658 100644 --- a/src/core/tools/browserActionTool.ts +++ b/src/core/tools/browserActionTool.ts @@ -158,17 +158,25 @@ export async function browserActionTool( case "resize": await cline.say("browser_action_result", JSON.stringify(browserActionResult)) - pushToolResult( - formatResponse.toolResult( + { + const toolResult = formatResponse.toolResult( `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ browserActionResult?.logs || "(No new logs)" }\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`, browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], - ), - ) + ) + + // Use the new method to manage browser screenshot history + await cline.addBrowserActionToApiHistory(toolResult) + + pushToolResult(toolResult) + } break case "close": + // Reset browser screenshot tracking when browser is closed + cline.resetBrowserScreenshotTracking() + pushToolResult( formatResponse.toolResult( `The browser has been closed. You may now proceed to using other tools.`,