diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index 77c055c6e152..9cf23e386f23 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -206,7 +206,8 @@ export const clineMessageSchema = z.object({ ask: clineAskSchema.optional(), say: clineSaySchema.optional(), text: z.string().optional(), - images: z.array(z.string()).optional(), + images: z.array(z.string()).optional(), // Webview URIs for frontend display + imagesBase64: z.array(z.string()).optional(), // Base64 data URLs for API calls partial: z.boolean().optional(), reasoning: z.string().optional(), conversationHistoryIndex: z.number().optional(), diff --git a/src/activate/registerCommands.ts b/src/activate/registerCommands.ts index 41c127333d85..73a9fadf6e13 100644 --- a/src/activate/registerCommands.ts +++ b/src/activate/registerCommands.ts @@ -268,7 +268,8 @@ export const openClineInNewTab = async ({ context, outputChannel }: Omit implements TaskLike { private async updateClineMessage(message: ClineMessage) { const provider = this.providerRef.deref() + + // Messages now store both formats, so no conversion needed + // The 'images' field already contains webview URIs for display await provider?.postMessageToWebview({ type: "messageUpdated", clineMessage: message }) this.emit(RooCodeEventName.Message, { action: "updated", message }) @@ -735,6 +738,7 @@ export class Task extends EventEmitter implements TaskLike { lastMessage.partial = partial lastMessage.progressStatus = progressStatus lastMessage.isProtected = isProtected + // Note: ask messages don't typically have images, so we don't update them here // TODO: Be more efficient about saving and posting only new // data or one whole message at a time so ignore partial for // saves, and only post parts of partial message instead of @@ -872,7 +876,13 @@ export class Task extends EventEmitter implements TaskLike { throw new Error("Current ask promise was ignored") } - const result = { response: this.askResponse!, text: this.askResponseText, images: this.askResponseImages } + let result: { response: ClineAskResponse; text?: string; images?: string[] } = { + response: this.askResponse!, + text: this.askResponseText, + images: this.askResponseImages, + } + // Images from askResponse are already webview URIs from the frontend, + // so no conversion needed here this.askResponse = undefined this.askResponseText = undefined this.askResponseImages = undefined @@ -1070,6 +1080,26 @@ export class Task extends EventEmitter implements TaskLike { throw new Error(`[RooCode#say] task ${this.taskId}.${this.instanceId} aborted`) } + // Convert images to both formats for efficient dual storage + let webviewUris: string[] | undefined + let base64Images: string[] | undefined + + if (Array.isArray(images) && images.length > 0) { + try { + const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl") + + // Store original webview URIs/file paths for frontend + webviewUris = images + + // Convert to base64 for API calls + base64Images = await normalizeImageRefsToDataUrls(images) + } catch (e) { + console.error("[Task#say] Failed to normalize image refs:", e) + // Fall back to original images if conversion fails + webviewUris = images + } + } + if (partial !== undefined) { const lastMessage = this.clineMessages.at(-1) @@ -1080,7 +1110,8 @@ export class Task extends EventEmitter implements TaskLike { if (isUpdatingPreviousPartial) { // Existing partial message, so update it. lastMessage.text = text - lastMessage.images = images + lastMessage.images = webviewUris + lastMessage.imagesBase64 = base64Images lastMessage.partial = partial lastMessage.progressStatus = progressStatus this.updateClineMessage(lastMessage) @@ -1097,7 +1128,8 @@ export class Task extends EventEmitter implements TaskLike { type: "say", say: type, text, - images, + images: webviewUris, + imagesBase64: base64Images, partial, contextCondense, metadata: options.metadata, @@ -1113,7 +1145,8 @@ export class Task extends EventEmitter implements TaskLike { } lastMessage.text = text - lastMessage.images = images + lastMessage.images = webviewUris + lastMessage.imagesBase64 = base64Images lastMessage.partial = false lastMessage.progressStatus = progressStatus if (options.metadata) { @@ -1144,7 +1177,8 @@ export class Task extends EventEmitter implements TaskLike { type: "say", say: type, text, - images, + images: webviewUris, + imagesBase64: base64Images, contextCondense, metadata: options.metadata, }) @@ -1167,7 +1201,8 @@ export class Task extends EventEmitter implements TaskLike { type: "say", say: type, text, - images, + images: webviewUris, + imagesBase64: base64Images, checkpoint, contextCondense, }) @@ -1212,13 +1247,20 @@ export class Task extends EventEmitter implements TaskLike { await this.providerRef.deref()?.postStateToWebview() + // Store the task message with both webview URIs and base64 + // This is now handled in say() method which stores both formats await this.say("text", task, images) this.isInitialized = true - let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images) + // Get base64 from the stored message for API call + const lastMessage = this.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 - // Task starting + // Convert base64 to image blocks for API + const { formatResponse } = await import("../prompts/responses") + let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(base64Images) + // Task starting await this.initiateTaskLoop([ { type: "text", @@ -1480,7 +1522,14 @@ export class Task extends EventEmitter implements TaskLike { } if (responseImages && responseImages.length > 0) { - newUserContent.push(...formatResponse.imageBlocks(responseImages)) + // Images from user response are webview URIs, convert to base64 for API + const { normalizeImageRefsToDataUrls } = await import("../../integrations/misc/imageDataUrl") + const base64ResponseImages = await normalizeImageRefsToDataUrls(responseImages) + + // Convert base64 to image blocks for API + const { formatResponse } = await import("../prompts/responses") + const responseImageBlocks = formatResponse.imageBlocks(base64ResponseImages) + newUserContent.push(...responseImageBlocks) } // Ensure we have at least some content to send to the API. @@ -1742,15 +1791,19 @@ export class Task extends EventEmitter implements TaskLike { ) if (response === "messageResponse") { + await this.say("user_feedback", text, images) + + // Get base64 from the just-stored message for API call + const lastMessage = this.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 + currentUserContent.push( ...[ { type: "text" as const, text: formatResponse.tooManyMistakes(text) }, - ...formatResponse.imageBlocks(images), + ...formatResponse.imageBlocks(base64Images), ], ) - await this.say("user_feedback", text, images) - // Track consecutive mistake errors in telemetry. TelemetryService.instance.captureConsecutiveMistakeError(this.taskId) } diff --git a/src/core/tools/accessMcpResourceTool.ts b/src/core/tools/accessMcpResourceTool.ts index c8a40f9236d2..8088d6bb34bd 100644 --- a/src/core/tools/accessMcpResourceTool.ts +++ b/src/core/tools/accessMcpResourceTool.ts @@ -82,7 +82,12 @@ export async function accessMcpResourceTool( }) await cline.say("mcp_server_response", resourceResultPretty, images) - pushToolResult(formatResponse.toolResult(resourceResultPretty, images)) + + // Get base64 from the just-stored message for API call + // Note: MCP images are already base64, but say() will store them in both formats + const lastMessage = cline.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 + pushToolResult(formatResponse.toolResult(resourceResultPretty, base64Images)) return } diff --git a/src/core/tools/askFollowupQuestionTool.ts b/src/core/tools/askFollowupQuestionTool.ts index e7369368873a..e3eb71076825 100644 --- a/src/core/tools/askFollowupQuestionTool.ts +++ b/src/core/tools/askFollowupQuestionTool.ts @@ -78,7 +78,11 @@ export async function askFollowupQuestionTool( cline.consecutiveMistakeCount = 0 const { text, images } = await cline.ask("followup", JSON.stringify(follow_up_json), false) await cline.say("user_feedback", text ?? "", images) - pushToolResult(formatResponse.toolResult(`\n${text}\n`, images)) + + // Get base64 from the just-stored message for API call + const lastMessage = cline.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 + pushToolResult(formatResponse.toolResult(`\n${text}\n`, base64Images)) return } diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 5074d7f4e808..74ca1282a8e1 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -121,6 +121,11 @@ export async function attemptCompletionTool( } await cline.say("user_feedback", text ?? "", images) + + // Get base64 from the just-stored message for API call + const lastMessage = cline.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 + const toolResults: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = [] toolResults.push({ @@ -128,7 +133,7 @@ export async function attemptCompletionTool( text: `The user has provided feedback on the results. Consider their input to continue the task, and then attempt completion again.\n\n${text}\n`, }) - toolResults.push(...formatResponse.imageBlocks(images)) + toolResults.push(...formatResponse.imageBlocks(base64Images)) cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) cline.userMessageContent.push(...toolResults) diff --git a/src/core/tools/executeCommandTool.ts b/src/core/tools/executeCommandTool.ts index 2c7ce0d023e2..11e03a2a490b 100644 --- a/src/core/tools/executeCommandTool.ts +++ b/src/core/tools/executeCommandTool.ts @@ -311,6 +311,10 @@ export async function executeCommand( const { text, images } = message await task.say("user_feedback", text, images) + // Get base64 from the just-stored message for API call + const lastMessage = task.clineMessages.at(-1) + const base64Images = lastMessage?.imagesBase64 + return [ true, formatResponse.toolResult( @@ -320,7 +324,7 @@ export async function executeCommand( `The user provided the following feedback:`, `\n${text}\n`, ].join("\n"), - images, + base64Images, ), ] } else if (completed || exitDetails) { diff --git a/src/core/tools/readFileTool.ts b/src/core/tools/readFileTool.ts index 01427f4d9dc7..61d445675aa6 100644 --- a/src/core/tools/readFileTool.ts +++ b/src/core/tools/readFileTool.ts @@ -406,10 +406,14 @@ export async function readFileTool( const { response, text, images } = await cline.ask("tool", completeMessage, false) + let feedbackBase64Images: string[] | undefined if (response !== "yesButtonClicked") { // Handle both messageResponse and noButtonClicked with text if (text) { await cline.say("user_feedback", text, images) + // Get base64 from the just-stored message + const lastMessage = cline.clineMessages.at(-1) + feedbackBase64Images = lastMessage?.imagesBase64 } cline.didRejectTool = true @@ -417,18 +421,21 @@ export async function readFileTool( status: "denied", xmlContent: `${relPath}Denied by user`, feedbackText: text, - feedbackImages: images, + feedbackImages: feedbackBase64Images, }) } else { // Handle yesButtonClicked with text if (text) { await cline.say("user_feedback", text, images) + // Get base64 from the just-stored message + const lastMessage = cline.clineMessages.at(-1) + feedbackBase64Images = lastMessage?.imagesBase64 } updateFileResult(relPath, { status: "approved", feedbackText: text, - feedbackImages: images, + feedbackImages: feedbackBase64Images, }) } } diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 119668d66821..81de590a53c4 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -759,7 +759,7 @@ export class ClineProvider }) // Set up webview options with proper resource roots - const resourceRoots = [this.contextProxy.extensionUri] + const resourceRoots = [this.contextProxy.extensionUri, this.contextProxy.globalStorageUri] // Add workspace folders to allow access to workspace files if (vscode.workspace.workspaceFolders) { @@ -1039,7 +1039,7 @@ export class ClineProvider "default-src 'none'", `font-src ${webview.cspSource} data:`, `style-src ${webview.cspSource} 'unsafe-inline' https://* http://${localServerUrl} http://0.0.0.0:${localPort}`, - `img-src ${webview.cspSource} https://storage.googleapis.com https://img.clerk.com data:`, + `img-src ${webview.cspSource} https://*.vscode-cdn.net https://storage.googleapis.com https://img.clerk.com data:`, `media-src ${webview.cspSource}`, `script-src 'unsafe-eval' ${webview.cspSource} https://* https://*.posthog.com http://${localServerUrl} http://0.0.0.0:${localPort} 'nonce-${nonce}'`, `connect-src ${webview.cspSource} https://* https://*.posthog.com ws://${localServerUrl} ws://0.0.0.0:${localPort} http://${localServerUrl} http://0.0.0.0:${localPort}`, @@ -1124,7 +1124,7 @@ export class ClineProvider - +