feat: add image support for MCP tool responses

roomote · roomote · commit 9503e43981ee · 2025-07-24T09:59:31.000Z
- Update processToolContent to handle image content types - Construct proper base64 data URIs for images - Pass images array to cline.say and pushToolResult - Add comprehensive tests for image handling Fixes #6163
diff --git a/src/core/tools/__tests__/useMcpToolTool.spec.ts b/src/core/tools/__tests__/useMcpToolTool.spec.ts
@@ -7,7 +7,11 @@ import { ToolUse } from "../../../shared/tools"
 // Mock dependencies
 vi.mock("../../prompts/responses", () => ({
 	formatResponse: {
-		toolResult: vi.fn((result: string) => `Tool result: ${result}`),
+		toolResult: vi.fn((result: string, images?: string[]) =>
+			images && images.length > 0
+				? `Tool result: ${result} [with ${images.length} image(s)]`
+				: `Tool result: ${result}`,
+		),
 		toolError: vi.fn((error: string) => `Tool error: ${error}`),
 		invalidMcpToolArgumentError: vi.fn((server: string, tool: string) => `Invalid args for ${server}:${tool}`),
 	},
@@ -208,10 +212,116 @@ describe("useMcpToolTool", () => {
 			expect(mockTask.consecutiveMistakeCount).toBe(0)
 			expect(mockAskApproval).toHaveBeenCalled()
 			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
-			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully")
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", [])
 			expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: Tool executed successfully")
 		})
 
+		it("should handle tool response with images", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "screenshot_server",
+					tool_name: "capture_screenshot",
+					arguments: '{"url": "https://example.com"}',
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{ type: "text", text: "Screenshot captured successfully" },
+					{
+						type: "image",
+						data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
+						mimeType: "image/png",
+					},
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool(
+				mockTask as Task,
+				block,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+			)
+
+			expect(mockTask.consecutiveMistakeCount).toBe(0)
+			expect(mockAskApproval).toHaveBeenCalled()
+			expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
+			expect(mockTask.say).toHaveBeenCalledWith(
+				"mcp_server_response",
+				"Screenshot captured successfully\n\n[Image: image/png]",
+				[
+					"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
+				],
+			)
+			expect(mockPushToolResult).toHaveBeenCalledWith(
+				"Tool result: Screenshot captured successfully\n\n[Image: image/png] [with 1 image(s)]",
+			)
+		})
+
+		it("should handle tool response with multiple images", async () => {
+			const block: ToolUse = {
+				type: "tool_use",
+				name: "use_mcp_tool",
+				params: {
+					server_name: "image_processor",
+					tool_name: "process_images",
+					arguments: "{}",
+				},
+				partial: false,
+			}
+
+			mockAskApproval.mockResolvedValue(true)
+
+			const mockToolResult = {
+				content: [
+					{ type: "text", text: "Processed 2 images" },
+					{ type: "image", data: "data:image/png;base64,ABC123", mimeType: "image/png" },
+					{ type: "image", data: "XYZ789", mimeType: "image/jpeg" },
+				],
+				isError: false,
+			}
+
+			mockProviderRef.deref.mockReturnValue({
+				getMcpHub: () => ({
+					callTool: vi.fn().mockResolvedValue(mockToolResult),
+				}),
+				postMessageToWebview: vi.fn(),
+			})
+
+			await useMcpToolTool(
+				mockTask as Task,
+				block,
+				mockAskApproval,
+				mockHandleError,
+				mockPushToolResult,
+				mockRemoveClosingTag,
+			)
+
+			expect(mockTask.say).toHaveBeenCalledWith(
+				"mcp_server_response",
+				"Processed 2 images\n\n[Image: image/png]\n\n[Image: image/jpeg]",
+				["data:image/png;base64,ABC123", "data:image/jpeg;base64,XYZ789"],
+			)
+			expect(mockPushToolResult).toHaveBeenCalledWith(
+				"Tool result: Processed 2 images\n\n[Image: image/png]\n\n[Image: image/jpeg] [with 2 image(s)]",
+			)
+		})
+
 		it("should handle user rejection", async () => {
 			const block: ToolUse = {
 				type: "tool_use",
diff --git a/src/core/tools/useMcpToolTool.ts b/src/core/tools/useMcpToolTool.ts
@@ -89,16 +89,27 @@ async function sendExecutionStatus(cline: Task, status: McpExecutionStatus): Pro
 	})
 }
 
-function processToolContent(toolResult: any): string {
+function processToolContent(toolResult: any): { text: string; images: string[] } {
+	const images: string[] = []
+
 	if (!toolResult?.content || toolResult.content.length === 0) {
-		return ""
+		return { text: "", images }
 	}
 
-	return toolResult.content
+	const text = toolResult.content
 		.map((item: any) => {
 			if (item.type === "text") {
 				return item.text
 			}
+			if (item.type === "image" && item.data && item.mimeType) {
+				// Handle base64 image data
+				if (item.data.startsWith("data:")) {
+					images.push(item.data)
+				} else {
+					images.push(`data:${item.mimeType};base64,${item.data}`)
+				}
+				return `[Image: ${item.mimeType}]`
+			}
 			if (item.type === "resource") {
 				const { blob: _, ...rest } = item.resource
 				return JSON.stringify(rest, null, 2)
@@ -107,6 +118,8 @@ function processToolContent(toolResult: any): string {
 		})
 		.filter(Boolean)
 		.join("\n\n")
+
+	return { text, images }
 }
 
 async function executeToolAndProcessResult(
@@ -130,9 +143,12 @@ async function executeToolAndProcessResult(
 	const toolResult = await cline.providerRef.deref()?.getMcpHub()?.callTool(serverName, toolName, parsedArguments)
 
 	let toolResultPretty = "(No response)"
+	let images: string[] = []
 
 	if (toolResult) {
-		const outputText = processToolContent(toolResult)
+		const processedContent = processToolContent(toolResult)
+		const outputText = processedContent.text
+		images = processedContent.images
 
 		if (outputText) {
 			await sendExecutionStatus(cline, {
@@ -160,8 +176,8 @@ async function executeToolAndProcessResult(
 		})
 	}
 
-	await cline.say("mcp_server_response", toolResultPretty)
-	pushToolResult(formatResponse.toolResult(toolResultPretty))
+	await cline.say("mcp_server_response", toolResultPretty, images)
+	pushToolResult(formatResponse.toolResult(toolResultPretty, images))
 }
 
 export async function useMcpToolTool(