Skip to content

Commit 023c2c2

Browse files
authored
removed all images except the latest two in AnthropicCuaClient (#905)
# why part of STG-586 Currently, we leave all of the images for anthropic cua client within the LLM's context as the task progresses # what changed We now remove all screenshots aside from the last two when experimental flag is set to true within stagehand config # test plan tested locally
1 parent c63b197 commit 023c2c2

File tree

7 files changed

+104
-0
lines changed

7 files changed

+104
-0
lines changed

.changeset/fresh-papayas-beam.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
Delete old images from anthropic cua client

lib/agent/AgentProvider.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ export class AgentProvider {
3535
modelName: string,
3636
clientOptions?: Record<string, unknown>,
3737
userProvidedInstructions?: string,
38+
experimental?: boolean,
3839
): AgentClient {
3940
const type = AgentProvider.getAgentProvider(modelName);
4041
this.logger({
@@ -58,6 +59,7 @@ export class AgentProvider {
5859
modelName,
5960
userProvidedInstructions,
6061
clientOptions,
62+
experimental,
6163
);
6264
default:
6365
throw new UnsupportedModelProviderError(

lib/agent/AnthropicCUAClient.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import {
1313
} from "@/types/agent";
1414
import { AgentClient } from "./AgentClient";
1515
import { AgentScreenshotProviderError } from "@/types/stagehandErrors";
16+
import { compressConversationImages } from "./imageCompressionUtils";
1617

1718
export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;
1819

@@ -30,12 +31,14 @@ export class AnthropicCUAClient extends AgentClient {
3031
private screenshotProvider?: () => Promise<string>;
3132
private actionHandler?: (action: AgentAction) => Promise<void>;
3233
private thinkingBudget: number | null = null;
34+
private experimental: boolean = false;
3335

3436
constructor(
3537
type: AgentType,
3638
modelName: string,
3739
userProvidedInstructions?: string,
3840
clientOptions?: Record<string, unknown>,
41+
experimental?: boolean,
3942
) {
4043
super(type, modelName, userProvidedInstructions);
4144

@@ -51,6 +54,7 @@ export class AnthropicCUAClient extends AgentClient {
5154
) {
5255
this.thinkingBudget = clientOptions.thinkingBudget;
5356
}
57+
this.experimental = experimental || false;
5458

5559
// Store client options for reference
5660
this.clientOptions = {
@@ -321,6 +325,9 @@ export class AnthropicCUAClient extends AgentClient {
321325
const nextInputItems: ResponseInputItem[] = [...inputItems];
322326

323327
// Add the assistant message with tool_use blocks to the history
328+
if (this.experimental) {
329+
compressConversationImages(nextInputItems);
330+
}
324331
nextInputItems.push(assistantMessage);
325332

326333
// Generate tool results and add them as a user message

lib/agent/imageCompressionUtils.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import {
2+
AnthropicMessage,
3+
AnthropicContentBlock,
4+
AnthropicToolResult,
5+
} from "@/types/agent";
6+
7+
export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;
8+
9+
/**
10+
* Finds all items in the conversation history that contain images
11+
* @param items - Array of conversation items to check
12+
* @returns Array of indices where images were found
13+
*/
14+
export function findItemsWithImages(items: ResponseInputItem[]): number[] {
15+
const itemsWithImages: number[] = [];
16+
17+
items.forEach((item, index) => {
18+
let hasImage = false;
19+
20+
if (Array.isArray(item.content)) {
21+
hasImage = item.content.some(
22+
(contentItem: AnthropicContentBlock) =>
23+
contentItem.type === "tool_result" &&
24+
"content" in contentItem &&
25+
Array.isArray(contentItem.content) &&
26+
(contentItem.content as AnthropicContentBlock[]).some(
27+
(nestedItem: AnthropicContentBlock) => nestedItem.type === "image",
28+
),
29+
);
30+
}
31+
32+
if (hasImage) {
33+
itemsWithImages.push(index);
34+
}
35+
});
36+
37+
return itemsWithImages;
38+
}
39+
40+
/**
41+
* Compresses conversation history by removing images from older items
42+
* while keeping the most recent images intact
43+
* @param items - Array of conversation items to process
44+
* @param keepMostRecentCount - Number of most recent image-containing items to preserve (default: 2)
45+
* @returns Object with processed items
46+
*/
47+
export function compressConversationImages(
48+
items: ResponseInputItem[],
49+
keepMostRecentCount: number = 2,
50+
): { items: ResponseInputItem[] } {
51+
const itemsWithImages = findItemsWithImages(items);
52+
53+
items.forEach((item, index) => {
54+
const imageIndex = itemsWithImages.indexOf(index);
55+
const shouldCompress =
56+
imageIndex >= 0 &&
57+
imageIndex < itemsWithImages.length - keepMostRecentCount;
58+
59+
if (shouldCompress) {
60+
if (Array.isArray(item.content)) {
61+
item.content = item.content.map(
62+
(contentItem: AnthropicContentBlock) => {
63+
if (
64+
contentItem.type === "tool_result" &&
65+
"content" in contentItem &&
66+
Array.isArray(contentItem.content) &&
67+
(contentItem.content as AnthropicContentBlock[]).some(
68+
(nestedItem: AnthropicContentBlock) =>
69+
nestedItem.type === "image",
70+
)
71+
) {
72+
return {
73+
...contentItem,
74+
content: "screenshot taken",
75+
} as AnthropicContentBlock;
76+
}
77+
return contentItem;
78+
},
79+
);
80+
}
81+
}
82+
});
83+
84+
return {
85+
items,
86+
};
87+
}

lib/handlers/agentHandler.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export class StagehandAgentHandler {
4141
options.modelName,
4242
options.clientOptions || {},
4343
options.userProvidedInstructions,
44+
options.experimental,
4445
);
4546

4647
// Store the client

lib/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ export class Stagehand {
931931
You are currently on the following page: ${this.stagehandPage.page.url()}.
932932
Do not ask follow up questions, the user will trust your judgement.`,
933933
agentType: options.provider,
934+
experimental: this.experimental,
934935
},
935936
);
936937

types/agent.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ export interface AgentHandlerOptions {
5252
clientOptions?: Record<string, unknown>;
5353
userProvidedInstructions?: string;
5454
agentType: AgentType;
55+
experimental?: boolean;
5556
}
5657

5758
export interface ActionExecutionResult {

0 commit comments

Comments
 (0)