Skip to content

Commit 17ee88c

Browse files
committed
fix: optimize image handling with dual-storage approach
- Add imagesBase64 field to ClineMessage schema for efficient dual storage - Store both webview URIs (for display) and base64 (for API) when creating messages - Remove repeated base64→file→URI conversions on every render - Update all tools to use base64 from stored messages for API calls - Eliminates file I/O overhead during rendering and message updates - Improves performance by storing each format once instead of converting repeatedly
1 parent d56b74f commit 17ee88c

File tree

10 files changed

+298
-45
lines changed

10 files changed

+298
-45
lines changed

packages/types/src/message.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ export const clineMessageSchema = z.object({
206206
ask: clineAskSchema.optional(),
207207
say: clineSaySchema.optional(),
208208
text: z.string().optional(),
209-
images: z.array(z.string()).optional(),
209+
images: z.array(z.string()).optional(), // Webview URIs for frontend display
210+
imagesBase64: z.array(z.string()).optional(), // Base64 data URLs for API calls
210211
partial: z.boolean().optional(),
211212
reasoning: z.string().optional(),
212213
conversationHistoryIndex: z.number().optional(),

src/core/assistant-message/presentAssistantMessage.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,12 @@ export async function presentAssistantMessage(cline: Task) {
291291
// Handle both messageResponse and noButtonClicked with text.
292292
if (text) {
293293
await cline.say("user_feedback", text, images)
294-
pushToolResult(formatResponse.toolResult(formatResponse.toolDeniedWithFeedback(text), images))
294+
// Get base64 from the just-stored message for API call
295+
const lastMessage = cline.clineMessages.at(-1)
296+
const base64Images = lastMessage?.imagesBase64
297+
pushToolResult(
298+
formatResponse.toolResult(formatResponse.toolDeniedWithFeedback(text), base64Images),
299+
)
295300
} else {
296301
pushToolResult(formatResponse.toolDenied())
297302
}
@@ -302,7 +307,12 @@ export async function presentAssistantMessage(cline: Task) {
302307
// Handle yesButtonClicked with text.
303308
if (text) {
304309
await cline.say("user_feedback", text, images)
305-
pushToolResult(formatResponse.toolResult(formatResponse.toolApprovedWithFeedback(text), images))
310+
// Get base64 from the just-stored message for API call
311+
const lastMessage = cline.clineMessages.at(-1)
312+
const base64Images = lastMessage?.imagesBase64
313+
pushToolResult(
314+
formatResponse.toolResult(formatResponse.toolApprovedWithFeedback(text), base64Images),
315+
)
306316
}
307317

308318
return true
@@ -396,18 +406,22 @@ export async function presentAssistantMessage(cline: Task) {
396406
)
397407

398408
if (response === "messageResponse") {
399-
// Add user feedback to userContent.
409+
// Add user feedback to chat (stores both formats)
410+
await cline.say("user_feedback", text, images)
411+
412+
// Get base64 from the just-stored message for API call
413+
const lastMessage = cline.clineMessages.at(-1)
414+
const base64Images = lastMessage?.imagesBase64
415+
416+
// Add user feedback to userContent for API
400417
cline.userMessageContent.push(
401418
{
402419
type: "text" as const,
403420
text: `Tool repetition limit reached. User feedback: ${text}`,
404421
},
405-
...formatResponse.imageBlocks(images),
422+
...formatResponse.imageBlocks(base64Images),
406423
)
407424

408-
// Add user feedback to chat.
409-
await cline.say("user_feedback", text, images)
410-
411425
// Track tool repetition in telemetry.
412426
TelemetryService.instance.captureConsecutiveMistakeError(cline.taskId)
413427
}

src/core/task/Task.ts

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
644644

645645
private async updateClineMessage(message: ClineMessage) {
646646
const provider = this.providerRef.deref()
647+
648+
// Messages now store both formats, so no conversion needed
649+
// The 'images' field already contains webview URIs for display
647650
await provider?.postMessageToWebview({ type: "messageUpdated", clineMessage: message })
648651
this.emit(RooCodeEventName.Message, { action: "updated", message })
649652

@@ -735,6 +738,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
735738
lastMessage.partial = partial
736739
lastMessage.progressStatus = progressStatus
737740
lastMessage.isProtected = isProtected
741+
// Note: ask messages don't typically have images, so we don't update them here
738742
// TODO: Be more efficient about saving and posting only new
739743
// data or one whole message at a time so ignore partial for
740744
// saves, and only post parts of partial message instead of
@@ -877,16 +881,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
877881
text: this.askResponseText,
878882
images: this.askResponseImages,
879883
}
880-
// Normalize any image refs to base64 data URLs before handing back to callers
881-
if (Array.isArray(result.images) && result.images.length > 0) {
882-
try {
883-
const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl")
884-
const normalized = await normalizeImageRefsToDataUrls(result.images)
885-
result.images = normalized
886-
} catch (e) {
887-
console.error("[Task#ask] Failed to normalize image refs:", e)
888-
}
889-
}
884+
// Images from askResponse are already webview URIs from the frontend,
885+
// so no conversion needed here
890886
this.askResponse = undefined
891887
this.askResponseText = undefined
892888
this.askResponseImages = undefined
@@ -1084,13 +1080,23 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
10841080
throw new Error(`[RooCode#say] task ${this.taskId}.${this.instanceId} aborted`)
10851081
}
10861082

1087-
// Ensure any image refs are normalized to base64 data URLs before persisting or sending to APIs
1083+
// Convert images to both formats for efficient dual storage
1084+
let webviewUris: string[] | undefined
1085+
let base64Images: string[] | undefined
1086+
10881087
if (Array.isArray(images) && images.length > 0) {
10891088
try {
10901089
const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl")
1091-
images = await normalizeImageRefsToDataUrls(images)
1090+
1091+
// Store original webview URIs/file paths for frontend
1092+
webviewUris = images
1093+
1094+
// Convert to base64 for API calls
1095+
base64Images = await normalizeImageRefsToDataUrls(images)
10921096
} catch (e) {
10931097
console.error("[Task#say] Failed to normalize image refs:", e)
1098+
// Fall back to original images if conversion fails
1099+
webviewUris = images
10941100
}
10951101
}
10961102

@@ -1104,7 +1110,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11041110
if (isUpdatingPreviousPartial) {
11051111
// Existing partial message, so update it.
11061112
lastMessage.text = text
1107-
lastMessage.images = images
1113+
lastMessage.images = webviewUris
1114+
lastMessage.imagesBase64 = base64Images
11081115
lastMessage.partial = partial
11091116
lastMessage.progressStatus = progressStatus
11101117
this.updateClineMessage(lastMessage)
@@ -1121,7 +1128,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11211128
type: "say",
11221129
say: type,
11231130
text,
1124-
images,
1131+
images: webviewUris,
1132+
imagesBase64: base64Images,
11251133
partial,
11261134
contextCondense,
11271135
metadata: options.metadata,
@@ -1137,7 +1145,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11371145
}
11381146

11391147
lastMessage.text = text
1140-
lastMessage.images = images
1148+
lastMessage.images = webviewUris
1149+
lastMessage.imagesBase64 = base64Images
11411150
lastMessage.partial = false
11421151
lastMessage.progressStatus = progressStatus
11431152
if (options.metadata) {
@@ -1168,7 +1177,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11681177
type: "say",
11691178
say: type,
11701179
text,
1171-
images,
1180+
images: webviewUris,
1181+
imagesBase64: base64Images,
11721182
contextCondense,
11731183
metadata: options.metadata,
11741184
})
@@ -1191,7 +1201,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11911201
type: "say",
11921202
say: type,
11931203
text,
1194-
images,
1204+
images: webviewUris,
1205+
imagesBase64: base64Images,
11951206
checkpoint,
11961207
contextCondense,
11971208
})
@@ -1236,14 +1247,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
12361247

12371248
await this.providerRef.deref()?.postStateToWebview()
12381249

1239-
// Convert webview URIs to base64 data URLs for backend storage (one-time conversion)
1240-
const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl")
1241-
const base64Images = images ? await normalizeImageRefsToDataUrls(images) : undefined
1242-
1243-
await this.say("text", task, base64Images) // Store base64 in backend messages
1250+
// Store the task message with both webview URIs and base64
1251+
// This is now handled in say() method which stores both formats
1252+
await this.say("text", task, images)
12441253
this.isInitialized = true
12451254

1246-
// Convert base64 to image blocks for API (no conversion needed, already base64)
1255+
// Get base64 from the stored message for API call
1256+
const lastMessage = this.clineMessages.at(-1)
1257+
const base64Images = lastMessage?.imagesBase64
1258+
1259+
// Convert base64 to image blocks for API
12471260
const { formatResponse } = await import("../prompts/responses")
12481261
let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(base64Images)
12491262

@@ -1509,11 +1522,11 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
15091522
}
15101523

15111524
if (responseImages && responseImages.length > 0) {
1512-
// Convert webview URIs to base64 data URLs for backend storage (one-time conversion)
1525+
// Images from user response are webview URIs, convert to base64 for API
15131526
const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl")
15141527
const base64ResponseImages = await normalizeImageRefsToDataUrls(responseImages)
15151528

1516-
// Convert base64 to image blocks for API (no conversion needed, already base64)
1529+
// Convert base64 to image blocks for API
15171530
const { formatResponse } = await import("../prompts/responses")
15181531
const responseImageBlocks = formatResponse.imageBlocks(base64ResponseImages)
15191532
newUserContent.push(...responseImageBlocks)
@@ -1778,15 +1791,19 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
17781791
)
17791792

17801793
if (response === "messageResponse") {
1794+
await this.say("user_feedback", text, images)
1795+
1796+
// Get base64 from the just-stored message for API call
1797+
const lastMessage = this.clineMessages.at(-1)
1798+
const base64Images = lastMessage?.imagesBase64
1799+
17811800
currentUserContent.push(
17821801
...[
17831802
{ type: "text" as const, text: formatResponse.tooManyMistakes(text) },
1784-
...formatResponse.imageBlocks(images),
1803+
...formatResponse.imageBlocks(base64Images),
17851804
],
17861805
)
17871806

1788-
await this.say("user_feedback", text, images)
1789-
17901807
// Track consecutive mistake errors in telemetry.
17911808
TelemetryService.instance.captureConsecutiveMistakeError(this.taskId)
17921809
}

src/core/tools/accessMcpResourceTool.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,12 @@ export async function accessMcpResourceTool(
8282
})
8383

8484
await cline.say("mcp_server_response", resourceResultPretty, images)
85-
pushToolResult(formatResponse.toolResult(resourceResultPretty, images))
85+
86+
// Get base64 from the just-stored message for API call
87+
// Note: MCP images are already base64, but say() will store them in both formats
88+
const lastMessage = cline.clineMessages.at(-1)
89+
const base64Images = lastMessage?.imagesBase64
90+
pushToolResult(formatResponse.toolResult(resourceResultPretty, base64Images))
8691

8792
return
8893
}

src/core/tools/askFollowupQuestionTool.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ export async function askFollowupQuestionTool(
7878
cline.consecutiveMistakeCount = 0
7979
const { text, images } = await cline.ask("followup", JSON.stringify(follow_up_json), false)
8080
await cline.say("user_feedback", text ?? "", images)
81-
pushToolResult(formatResponse.toolResult(`<answer>\n${text}\n</answer>`, images))
81+
82+
// Get base64 from the just-stored message for API call
83+
const lastMessage = cline.clineMessages.at(-1)
84+
const base64Images = lastMessage?.imagesBase64
85+
pushToolResult(formatResponse.toolResult(`<answer>\n${text}\n</answer>`, base64Images))
8286

8387
return
8488
}

src/core/tools/attemptCompletionTool.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,19 @@ export async function attemptCompletionTool(
121121
}
122122

123123
await cline.say("user_feedback", text ?? "", images)
124+
125+
// Get base64 from the just-stored message for API call
126+
const lastMessage = cline.clineMessages.at(-1)
127+
const base64Images = lastMessage?.imagesBase64
128+
124129
const toolResults: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = []
125130

126131
toolResults.push({
127132
type: "text",
128133
text: `The user has provided feedback on the results. Consider their input to continue the task, and then attempt completion again.\n<feedback>\n${text}\n</feedback>`,
129134
})
130135

131-
toolResults.push(...formatResponse.imageBlocks(images))
136+
toolResults.push(...formatResponse.imageBlocks(base64Images))
132137
cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` })
133138
cline.userMessageContent.push(...toolResults)
134139

src/core/tools/executeCommandTool.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ export async function executeCommand(
311311
const { text, images } = message
312312
await task.say("user_feedback", text, images)
313313

314+
// Get base64 from the just-stored message for API call
315+
const lastMessage = task.clineMessages.at(-1)
316+
const base64Images = lastMessage?.imagesBase64
317+
314318
return [
315319
true,
316320
formatResponse.toolResult(
@@ -320,7 +324,7 @@ export async function executeCommand(
320324
`The user provided the following feedback:`,
321325
`<feedback>\n${text}\n</feedback>`,
322326
].join("\n"),
323-
images,
327+
base64Images,
324328
),
325329
]
326330
} else if (completed || exitDetails) {

src/core/tools/readFileTool.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,29 +406,36 @@ export async function readFileTool(
406406

407407
const { response, text, images } = await cline.ask("tool", completeMessage, false)
408408

409+
let feedbackBase64Images: string[] | undefined
409410
if (response !== "yesButtonClicked") {
410411
// Handle both messageResponse and noButtonClicked with text
411412
if (text) {
412413
await cline.say("user_feedback", text, images)
414+
// Get base64 from the just-stored message
415+
const lastMessage = cline.clineMessages.at(-1)
416+
feedbackBase64Images = lastMessage?.imagesBase64
413417
}
414418
cline.didRejectTool = true
415419

416420
updateFileResult(relPath, {
417421
status: "denied",
418422
xmlContent: `<file><path>${relPath}</path><status>Denied by user</status></file>`,
419423
feedbackText: text,
420-
feedbackImages: images,
424+
feedbackImages: feedbackBase64Images,
421425
})
422426
} else {
423427
// Handle yesButtonClicked with text
424428
if (text) {
425429
await cline.say("user_feedback", text, images)
430+
// Get base64 from the just-stored message
431+
const lastMessage = cline.clineMessages.at(-1)
432+
feedbackBase64Images = lastMessage?.imagesBase64
426433
}
427434

428435
updateFileResult(relPath, {
429436
status: "approved",
430437
feedbackText: text,
431-
feedbackImages: images,
438+
feedbackImages: feedbackBase64Images,
432439
})
433440
}
434441
}

0 commit comments

Comments
 (0)