Skip to content

Commit a05620b

Browse files
committed
feat: enhance MCP tool response handling to support images
**Changes:** - Updated `processToolContent` to return both text and images from tool results. - Modified `executeToolAndProcessResult` to handle and pass images to the response. - Adjusted `combineCommandSequences` to preserve images from MCP server responses. - Updated UI components to display images alongside text responses. **Testing:** - Added tests to verify correct handling of tool results with text and images. - Ensured that image-only responses are processed correctly. **Files Modified:** - `src/core/prompts/responses.ts` - `src/core/tools/useMcpToolTool.ts` - `src/shared/combineCommandSequences.ts` - `webview-ui/src/components/chat/McpExecution.tsx` - `webview-ui/src/components/chat/ChatRow.tsx` - Test files for MCP tool functionality.
1 parent 0ce4e89 commit a05620b

File tree

7 files changed

+248
-40
lines changed

7 files changed

+248
-40
lines changed

src/core/prompts/responses.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,16 @@ Otherwise, if you have not completed the task and do not need additional informa
8787
images?: string[],
8888
): string | Array<Anthropic.TextBlockParam | Anthropic.ImageBlockParam> => {
8989
if (images && images.length > 0) {
90-
const textBlock: Anthropic.TextBlockParam = { type: "text", text }
9190
const imageBlocks: Anthropic.ImageBlockParam[] = formatImagesIntoBlocks(images)
92-
// Placing images after text leads to better results
93-
return [textBlock, ...imageBlocks]
91+
92+
if (text.trim()) {
93+
const textBlock: Anthropic.TextBlockParam = { type: "text", text }
94+
// Placing images after text leads to better results
95+
return [textBlock, ...imageBlocks]
96+
} else {
97+
// For image-only responses, return only image blocks
98+
return imageBlocks
99+
}
94100
} else {
95101
return text
96102
}

src/core/tools/__tests__/useMcpToolTool.spec.ts

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@ import { ToolUse } from "../../../shared/tools"
77
// Mock dependencies
88
vi.mock("../../prompts/responses", () => ({
99
formatResponse: {
10-
toolResult: vi.fn((result: string) => `Tool result: ${result}`),
10+
toolResult: vi.fn((result: string, images?: string[]) => {
11+
if (images && images.length > 0) {
12+
return `Tool result: ${result} (with ${images.length} images)`
13+
}
14+
return `Tool result: ${result}`
15+
}),
1116
toolError: vi.fn((error: string) => `Tool error: ${error}`),
1217
invalidMcpToolArgumentError: vi.fn((server: string, tool: string) => `Invalid args for ${server}:${tool}`),
1318
unknownMcpToolError: vi.fn((server: string, tool: string, availableTools: string[]) => {
@@ -223,10 +228,111 @@ describe("useMcpToolTool", () => {
223228
expect(mockTask.consecutiveMistakeCount).toBe(0)
224229
expect(mockAskApproval).toHaveBeenCalled()
225230
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
226-
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully")
231+
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Tool executed successfully", [])
227232
expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: Tool executed successfully")
228233
})
229234

235+
it("should handle tool result with text and images", async () => {
236+
const block: ToolUse = {
237+
type: "tool_use",
238+
name: "use_mcp_tool",
239+
params: {
240+
server_name: "test_server",
241+
tool_name: "test_tool",
242+
arguments: '{"param": "value"}',
243+
},
244+
partial: false,
245+
}
246+
247+
mockAskApproval.mockResolvedValue(true)
248+
249+
const mockToolResult = {
250+
content: [
251+
{ type: "text", text: "Generated image:" },
252+
{
253+
type: "image",
254+
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
255+
mimeType: "image/png",
256+
},
257+
],
258+
isError: false,
259+
}
260+
261+
mockProviderRef.deref.mockReturnValue({
262+
getMcpHub: () => ({
263+
callTool: vi.fn().mockResolvedValue(mockToolResult),
264+
}),
265+
postMessageToWebview: vi.fn(),
266+
})
267+
268+
await useMcpToolTool(
269+
mockTask as Task,
270+
block,
271+
mockAskApproval,
272+
mockHandleError,
273+
mockPushToolResult,
274+
mockRemoveClosingTag,
275+
)
276+
277+
expect(mockTask.consecutiveMistakeCount).toBe(0)
278+
expect(mockAskApproval).toHaveBeenCalled()
279+
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
280+
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "Generated image:", [
281+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
282+
])
283+
expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: Generated image: (with 1 images)")
284+
})
285+
286+
it("should handle tool result with only images (no text)", async () => {
287+
const block: ToolUse = {
288+
type: "tool_use",
289+
name: "use_mcp_tool",
290+
params: {
291+
server_name: "test_server",
292+
tool_name: "test_tool",
293+
arguments: '{"param": "value"}',
294+
},
295+
partial: false,
296+
}
297+
298+
mockAskApproval.mockResolvedValue(true)
299+
300+
const mockToolResult = {
301+
content: [
302+
{
303+
type: "image",
304+
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
305+
mimeType: "image/png",
306+
},
307+
],
308+
isError: false,
309+
}
310+
311+
mockProviderRef.deref.mockReturnValue({
312+
getMcpHub: () => ({
313+
callTool: vi.fn().mockResolvedValue(mockToolResult),
314+
}),
315+
postMessageToWebview: vi.fn(),
316+
})
317+
318+
await useMcpToolTool(
319+
mockTask as Task,
320+
block,
321+
mockAskApproval,
322+
mockHandleError,
323+
mockPushToolResult,
324+
mockRemoveClosingTag,
325+
)
326+
327+
expect(mockTask.consecutiveMistakeCount).toBe(0)
328+
expect(mockAskApproval).toHaveBeenCalled()
329+
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_request_started")
330+
expect(mockTask.say).toHaveBeenCalledWith("mcp_server_response", "", [
331+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
332+
])
333+
expect(mockPushToolResult).toHaveBeenCalledWith("Tool result: (with 1 images)")
334+
})
335+
230336
it("should handle user rejection", async () => {
231337
const block: ToolUse = {
232338
type: "tool_use",

src/core/tools/useMcpToolTool.ts

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -195,24 +195,39 @@ async function sendExecutionStatus(cline: Task, status: McpExecutionStatus): Pro
195195
})
196196
}
197197

198-
function processToolContent(toolResult: any): string {
198+
function processToolContent(toolResult: any): { text: string; images: string[] } {
199199
if (!toolResult?.content || toolResult.content.length === 0) {
200-
return ""
200+
return { text: "", images: [] }
201201
}
202202

203-
return toolResult.content
204-
.map((item: any) => {
205-
if (item.type === "text") {
206-
return item.text
203+
const textParts: string[] = []
204+
const images: string[] = []
205+
206+
toolResult.content.forEach((item: any) => {
207+
if (item.type === "text") {
208+
textParts.push(item.text)
209+
} else if (item.type === "image") {
210+
if (item.data && item.mimeType) {
211+
const validImageTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"]
212+
if (validImageTypes.includes(item.mimeType)) {
213+
const dataUrl = `data:${item.mimeType};base64,${item.data}`
214+
images.push(dataUrl)
215+
} else {
216+
console.warn(`Unsupported image MIME type: ${item.mimeType}`)
217+
}
218+
} else {
219+
console.warn("Invalid MCP ImageContent: missing data or mimeType")
207220
}
208-
if (item.type === "resource") {
209-
const { blob: _, ...rest } = item.resource
210-
return JSON.stringify(rest, null, 2)
211-
}
212-
return ""
213-
})
214-
.filter(Boolean)
215-
.join("\n\n")
221+
} else if (item.type === "resource") {
222+
const { blob: _, ...rest } = item.resource
223+
textParts.push(JSON.stringify(rest, null, 2))
224+
}
225+
})
226+
227+
return {
228+
text: textParts.filter(Boolean).join("\n\n"),
229+
images,
230+
}
216231
}
217232

218233
async function executeToolAndProcessResult(
@@ -236,11 +251,13 @@ async function executeToolAndProcessResult(
236251
const toolResult = await cline.providerRef.deref()?.getMcpHub()?.callTool(serverName, toolName, parsedArguments)
237252

238253
let toolResultPretty = "(No response)"
254+
let images: string[] = []
239255

240256
if (toolResult) {
241-
const outputText = processToolContent(toolResult)
257+
const { text: outputText, images: outputImages } = processToolContent(toolResult)
258+
images = outputImages
242259

243-
if (outputText) {
260+
if (outputText || images.length > 0) {
244261
await sendExecutionStatus(cline, {
245262
executionId,
246263
status: "output",
@@ -266,8 +283,8 @@ async function executeToolAndProcessResult(
266283
})
267284
}
268285

269-
await cline.say("mcp_server_response", toolResultPretty)
270-
pushToolResult(formatResponse.toolResult(toolResultPretty))
286+
await cline.say("mcp_server_response", toolResultPretty, images)
287+
pushToolResult(formatResponse.toolResult(toolResultPretty, images))
271288
}
272289

273290
export async function useMcpToolTool(

src/shared/__tests__/combineCommandSequences.spec.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,48 @@ describe("combineCommandSequences", () => {
8989
})
9090
})
9191

92+
it("should preserve images from mcp_server_response messages", () => {
93+
const messages: ClineMessage[] = [
94+
{
95+
type: "ask",
96+
ask: "use_mcp_server",
97+
text: JSON.stringify({
98+
serverName: "test-server",
99+
toolName: "test-tool",
100+
arguments: { param: "value" },
101+
}),
102+
ts: 1625097600000,
103+
},
104+
{
105+
type: "say",
106+
say: "mcp_server_response",
107+
text: "Generated 1 image",
108+
images: [
109+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
110+
],
111+
ts: 1625097601000,
112+
},
113+
]
114+
115+
const result = combineCommandSequences(messages)
116+
117+
expect(result).toHaveLength(1)
118+
expect(result[0]).toEqual({
119+
type: "ask",
120+
ask: "use_mcp_server",
121+
text: JSON.stringify({
122+
serverName: "test-server",
123+
toolName: "test-tool",
124+
arguments: { param: "value" },
125+
response: "Generated 1 image",
126+
}),
127+
images: [
128+
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChAI9jU",
129+
],
130+
ts: 1625097600000,
131+
})
132+
})
133+
92134
it("should handle multiple MCP server requests", () => {
93135
const messages: ClineMessage[] = [
94136
{

src/shared/combineCommandSequences.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,16 @@ export function combineCommandSequences(messages: ClineMessage[]): ClineMessage[
3838
if (msg.type === "ask" && msg.ask === "use_mcp_server") {
3939
// Look ahead for MCP responses
4040
let responses: string[] = []
41+
let allImages: string[] = []
4142
let j = i + 1
4243

4344
while (j < messages.length) {
4445
if (messages[j].say === "mcp_server_response") {
4546
responses.push(messages[j].text || "")
47+
// Collect images from MCP server responses
48+
if (messages[j].images && Array.isArray(messages[j].images) && messages[j].images!.length > 0) {
49+
allImages.push(...messages[j].images!)
50+
}
4651
processedIndices.add(j)
4752
j++
4853
} else if (messages[j].type === "ask" && messages[j].ask === "use_mcp_server") {
@@ -57,13 +62,22 @@ export function combineCommandSequences(messages: ClineMessage[]): ClineMessage[
5762
// Parse the JSON from the message text
5863
const jsonObj = safeJsonParse<any>(msg.text || "{}", {})
5964

60-
// Add the response to the JSON object
61-
jsonObj.response = responses.join("\n")
65+
// Only add non-empty responses
66+
const nonEmptyResponses = responses.filter((response) => response.trim())
67+
if (nonEmptyResponses.length > 0) {
68+
jsonObj.response = nonEmptyResponses.join("\n")
69+
}
6270

6371
// Stringify the updated JSON object
6472
const combinedText = JSON.stringify(jsonObj)
6573

66-
combinedMessages.set(msg.ts, { ...msg, text: combinedText })
74+
// Preserve images in the combined message
75+
const combinedMessage = { ...msg, text: combinedText }
76+
if (allImages.length > 0) {
77+
combinedMessage.images = allImages
78+
}
79+
80+
combinedMessages.set(msg.ts, combinedMessage)
6781
} else {
6882
// If there's no response, just keep the original message
6983
combinedMessages.set(msg.ts, { ...msg })

webview-ui/src/components/chat/ChatRow.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,7 @@ export const ChatRowContent = ({
14911491
server={server}
14921492
useMcpServer={useMcpServer}
14931493
alwaysAllowMcp={alwaysAllowMcp}
1494+
images={message.images}
14941495
/>
14951496
)}
14961497
</div>

0 commit comments

Comments
 (0)