Skip to content

Commit 9f111e1

Browse files
authored
fix: handle unknown/invalid native tool calls to prevent extension freeze (#9834)
1 parent 7945860 commit 9f111e1

File tree

22 files changed

+398
-27
lines changed

22 files changed

+398
-27
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
// npx vitest src/core/assistant-message/__tests__/presentAssistantMessage-unknown-tool.spec.ts
2+
3+
import { describe, it, expect, beforeEach, vi } from "vitest"
4+
import { presentAssistantMessage } from "../presentAssistantMessage"
5+
6+
// Mock dependencies
7+
vi.mock("../../task/Task")
8+
vi.mock("../../tools/validateToolUse", () => ({
9+
validateToolUse: vi.fn(),
10+
}))
11+
vi.mock("@roo-code/telemetry", () => ({
12+
TelemetryService: {
13+
instance: {
14+
captureToolUsage: vi.fn(),
15+
captureConsecutiveMistakeError: vi.fn(),
16+
},
17+
},
18+
}))
19+
20+
describe("presentAssistantMessage - Unknown Tool Handling", () => {
21+
let mockTask: any
22+
23+
beforeEach(() => {
24+
// Create a mock Task with minimal properties needed for testing
25+
mockTask = {
26+
taskId: "test-task-id",
27+
instanceId: "test-instance",
28+
abort: false,
29+
presentAssistantMessageLocked: false,
30+
presentAssistantMessageHasPendingUpdates: false,
31+
currentStreamingContentIndex: 0,
32+
assistantMessageContent: [],
33+
userMessageContent: [],
34+
didCompleteReadingStream: false,
35+
didRejectTool: false,
36+
didAlreadyUseTool: false,
37+
diffEnabled: false,
38+
consecutiveMistakeCount: 0,
39+
clineMessages: [],
40+
api: {
41+
getModel: () => ({ id: "test-model", info: {} }),
42+
},
43+
browserSession: {
44+
closeBrowser: vi.fn().mockResolvedValue(undefined),
45+
},
46+
recordToolUsage: vi.fn(),
47+
recordToolError: vi.fn(),
48+
toolRepetitionDetector: {
49+
check: vi.fn().mockReturnValue({ allowExecution: true }),
50+
},
51+
providerRef: {
52+
deref: () => ({
53+
getState: vi.fn().mockResolvedValue({
54+
mode: "code",
55+
customModes: [],
56+
}),
57+
}),
58+
},
59+
say: vi.fn().mockResolvedValue(undefined),
60+
ask: vi.fn().mockResolvedValue({ response: "yesButtonClicked" }),
61+
}
62+
})
63+
64+
it("should return error for unknown tool in native protocol", async () => {
65+
// Set up a tool_use block with an unknown tool name and an ID (native protocol)
66+
const toolCallId = "tool_call_unknown_123"
67+
mockTask.assistantMessageContent = [
68+
{
69+
type: "tool_use",
70+
id: toolCallId, // ID indicates native protocol
71+
name: "nonexistent_tool",
72+
params: { some: "param" },
73+
partial: false,
74+
},
75+
]
76+
77+
// Execute presentAssistantMessage
78+
await presentAssistantMessage(mockTask)
79+
80+
// Verify that a tool_result with error was pushed
81+
const toolResult = mockTask.userMessageContent.find(
82+
(item: any) => item.type === "tool_result" && item.tool_use_id === toolCallId,
83+
)
84+
85+
expect(toolResult).toBeDefined()
86+
expect(toolResult.tool_use_id).toBe(toolCallId)
87+
// The error is wrapped in JSON by formatResponse.toolError
88+
expect(toolResult.content).toContain("nonexistent_tool")
89+
expect(toolResult.content).toContain("does not exist")
90+
expect(toolResult.content).toContain("error")
91+
92+
// Verify consecutiveMistakeCount was incremented
93+
expect(mockTask.consecutiveMistakeCount).toBe(1)
94+
95+
// Verify recordToolError was called
96+
expect(mockTask.recordToolError).toHaveBeenCalledWith(
97+
"nonexistent_tool",
98+
expect.stringContaining("Unknown tool"),
99+
)
100+
101+
// Verify error message was shown to user (uses i18n key)
102+
expect(mockTask.say).toHaveBeenCalledWith("error", "unknownToolError")
103+
})
104+
105+
it("should return error for unknown tool in XML protocol", async () => {
106+
// Set up a tool_use block with an unknown tool name WITHOUT an ID (XML protocol)
107+
mockTask.assistantMessageContent = [
108+
{
109+
type: "tool_use",
110+
// No ID = XML protocol
111+
name: "fake_tool_that_does_not_exist",
112+
params: { param1: "value1" },
113+
partial: false,
114+
},
115+
]
116+
117+
// Execute presentAssistantMessage
118+
await presentAssistantMessage(mockTask)
119+
120+
// For XML protocol, error is pushed as text blocks
121+
const textBlocks = mockTask.userMessageContent.filter((item: any) => item.type === "text")
122+
123+
// There should be text blocks with error message
124+
expect(textBlocks.length).toBeGreaterThan(0)
125+
const hasErrorMessage = textBlocks.some(
126+
(block: any) =>
127+
block.text?.includes("fake_tool_that_does_not_exist") && block.text?.includes("does not exist"),
128+
)
129+
expect(hasErrorMessage).toBe(true)
130+
131+
// Verify consecutiveMistakeCount was incremented
132+
expect(mockTask.consecutiveMistakeCount).toBe(1)
133+
134+
// Verify recordToolError was called
135+
expect(mockTask.recordToolError).toHaveBeenCalled()
136+
137+
// Verify error message was shown to user (uses i18n key)
138+
expect(mockTask.say).toHaveBeenCalledWith("error", "unknownToolError")
139+
})
140+
141+
it("should handle unknown tool without freezing (native protocol)", async () => {
142+
// This test ensures the extension doesn't freeze when an unknown tool is called
143+
const toolCallId = "tool_call_freeze_test"
144+
mockTask.assistantMessageContent = [
145+
{
146+
type: "tool_use",
147+
id: toolCallId, // Native protocol
148+
name: "this_tool_definitely_does_not_exist",
149+
params: {},
150+
partial: false,
151+
},
152+
]
153+
154+
// The test will timeout if the extension freezes
155+
const timeoutPromise = new Promise<boolean>((_, reject) => {
156+
setTimeout(() => reject(new Error("Test timed out - extension likely froze")), 5000)
157+
})
158+
159+
const resultPromise = presentAssistantMessage(mockTask).then(() => true)
160+
161+
// Race between the function completing and the timeout
162+
const completed = await Promise.race([resultPromise, timeoutPromise])
163+
expect(completed).toBe(true)
164+
165+
// Verify a tool_result was pushed (critical for API not to freeze)
166+
const toolResult = mockTask.userMessageContent.find(
167+
(item: any) => item.type === "tool_result" && item.tool_use_id === toolCallId,
168+
)
169+
expect(toolResult).toBeDefined()
170+
})
171+
172+
it("should increment consecutiveMistakeCount for unknown tools", async () => {
173+
// Test with multiple unknown tools to ensure mistake count increments
174+
const toolCallId = "tool_call_mistake_test"
175+
mockTask.assistantMessageContent = [
176+
{
177+
type: "tool_use",
178+
id: toolCallId,
179+
name: "unknown_tool_1",
180+
params: {},
181+
partial: false,
182+
},
183+
]
184+
185+
expect(mockTask.consecutiveMistakeCount).toBe(0)
186+
187+
await presentAssistantMessage(mockTask)
188+
189+
expect(mockTask.consecutiveMistakeCount).toBe(1)
190+
})
191+
192+
it("should set userMessageContentReady after handling unknown tool", async () => {
193+
const toolCallId = "tool_call_ready_test"
194+
mockTask.assistantMessageContent = [
195+
{
196+
type: "tool_use",
197+
id: toolCallId,
198+
name: "unknown_tool",
199+
params: {},
200+
partial: false,
201+
},
202+
]
203+
204+
mockTask.didCompleteReadingStream = true
205+
mockTask.userMessageContentReady = false
206+
207+
await presentAssistantMessage(mockTask)
208+
209+
// userMessageContentReady should be set after processing
210+
expect(mockTask.userMessageContentReady).toBe(true)
211+
})
212+
213+
it("should still work with didAlreadyUseTool flag for unknown tool", async () => {
214+
const toolCallId = "tool_call_already_used_test"
215+
mockTask.assistantMessageContent = [
216+
{
217+
type: "tool_use",
218+
id: toolCallId,
219+
name: "unknown_tool",
220+
params: {},
221+
partial: false,
222+
},
223+
]
224+
225+
mockTask.didAlreadyUseTool = true
226+
227+
await presentAssistantMessage(mockTask)
228+
229+
// When didAlreadyUseTool is true, should send error tool_result
230+
const toolResult = mockTask.userMessageContent.find(
231+
(item: any) => item.type === "tool_result" && item.tool_use_id === toolCallId,
232+
)
233+
234+
expect(toolResult).toBeDefined()
235+
expect(toolResult.is_error).toBe(true)
236+
expect(toolResult.content).toContain("was not executed because a tool has already been used")
237+
})
238+
239+
it("should still work with didRejectTool flag for unknown tool", async () => {
240+
const toolCallId = "tool_call_rejected_test"
241+
mockTask.assistantMessageContent = [
242+
{
243+
type: "tool_use",
244+
id: toolCallId,
245+
name: "unknown_tool",
246+
params: {},
247+
partial: false,
248+
},
249+
]
250+
251+
mockTask.didRejectTool = true
252+
253+
await presentAssistantMessage(mockTask)
254+
255+
// When didRejectTool is true, should send error tool_result
256+
const toolResult = mockTask.userMessageContent.find(
257+
(item: any) => item.type === "tool_result" && item.tool_use_id === toolCallId,
258+
)
259+
260+
expect(toolResult).toBeDefined()
261+
expect(toolResult.is_error).toBe(true)
262+
expect(toolResult.content).toContain("due to user rejecting a previous tool")
263+
})
264+
})

src/core/assistant-message/presentAssistantMessage.ts

Lines changed: 77 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { TelemetryService } from "@roo-code/telemetry"
88
import { defaultModeSlug, getModeBySlug } from "../../shared/modes"
99
import type { ToolParamName, ToolResponse, ToolUse, McpToolUse } from "../../shared/tools"
1010
import { Package } from "../../shared/package"
11+
import { t } from "../../i18n"
1112

1213
import { fetchInstructionsTool } from "../tools/FetchInstructionsTool"
1314
import { listFilesTool } from "../tools/ListFilesTool"
@@ -333,7 +334,11 @@ export async function presentAssistantMessage(cline: Task) {
333334
await cline.say("text", content, undefined, block.partial)
334335
break
335336
}
336-
case "tool_use":
337+
case "tool_use": {
338+
// Fetch state early so it's available for toolDescription and validation
339+
const state = await cline.providerRef.deref()?.getState()
340+
const { mode, customModes, experiments: stateExperiments, apiConfiguration } = state ?? {}
341+
337342
const toolDescription = (): string => {
338343
switch (block.name) {
339344
case "execute_command":
@@ -675,30 +680,46 @@ export async function presentAssistantMessage(cline: Task) {
675680
TelemetryService.instance.captureToolUsage(cline.taskId, block.name, toolProtocol)
676681
}
677682

678-
// Validate tool use before execution.
679-
const {
680-
mode,
681-
customModes,
682-
experiments: stateExperiments,
683-
apiConfiguration,
684-
} = (await cline.providerRef.deref()?.getState()) ?? {}
685-
const modelInfo = cline.api.getModel()
686-
const includedTools = modelInfo?.info?.includedTools
687-
688-
try {
689-
validateToolUse(
690-
block.name as ToolName,
691-
mode ?? defaultModeSlug,
692-
customModes ?? [],
693-
{ apply_diff: cline.diffEnabled },
694-
block.params,
695-
stateExperiments,
696-
includedTools,
697-
)
698-
} catch (error) {
699-
cline.consecutiveMistakeCount++
700-
pushToolResult(formatResponse.toolError(error.message, toolProtocol))
701-
break
683+
// Validate tool use before execution - ONLY for complete (non-partial) blocks.
684+
// Validating partial blocks would cause validation errors to be thrown repeatedly
685+
// during streaming, pushing multiple tool_results for the same tool_use_id and
686+
// potentially causing the stream to appear frozen.
687+
if (!block.partial) {
688+
const modelInfo = cline.api.getModel()
689+
const includedTools = modelInfo?.info?.includedTools
690+
691+
try {
692+
validateToolUse(
693+
block.name as ToolName,
694+
mode ?? defaultModeSlug,
695+
customModes ?? [],
696+
{ apply_diff: cline.diffEnabled },
697+
block.params,
698+
stateExperiments,
699+
includedTools,
700+
)
701+
} catch (error) {
702+
cline.consecutiveMistakeCount++
703+
// For validation errors (unknown tool, tool not allowed for mode), we need to:
704+
// 1. Send a tool_result with the error (required for native protocol)
705+
// 2. NOT set didAlreadyUseTool = true (the tool was never executed, just failed validation)
706+
// This prevents the stream from being interrupted with "Response interrupted by tool use result"
707+
// which would cause the extension to appear to hang
708+
const errorContent = formatResponse.toolError(error.message, toolProtocol)
709+
if (toolProtocol === TOOL_PROTOCOL.NATIVE && toolCallId) {
710+
// For native protocol, push tool_result directly without setting didAlreadyUseTool
711+
cline.userMessageContent.push({
712+
type: "tool_result",
713+
tool_use_id: toolCallId,
714+
content: typeof errorContent === "string" ? errorContent : "(validation error)",
715+
is_error: true,
716+
} as Anthropic.ToolResultBlockParam)
717+
} else {
718+
// For XML protocol, use the standard pushToolResult
719+
pushToolResult(errorContent)
720+
}
721+
break
722+
}
702723
}
703724

704725
// Check for identical consecutive tool calls.
@@ -995,9 +1016,40 @@ export async function presentAssistantMessage(cline: Task) {
9951016
toolProtocol,
9961017
})
9971018
break
1019+
default: {
1020+
// Handle unknown/invalid tool names
1021+
// This is critical for native protocol where every tool_use MUST have a tool_result
1022+
// Note: This case should rarely be reached since validateToolUse now checks for unknown tools
1023+
1024+
// CRITICAL: Don't process partial blocks for unknown tools - just let them stream in.
1025+
// If we try to show errors for partial blocks, we'd show the error on every streaming chunk,
1026+
// creating a loop that appears to freeze the extension. Only handle complete blocks.
1027+
if (block.partial) {
1028+
break
1029+
}
1030+
1031+
const errorMessage = `Unknown tool "${block.name}". This tool does not exist. Please use one of the available tools.`
1032+
cline.consecutiveMistakeCount++
1033+
cline.recordToolError(block.name as ToolName, errorMessage)
1034+
await cline.say("error", t("tools:unknownToolError", { toolName: block.name }))
1035+
// Push tool_result directly for native protocol WITHOUT setting didAlreadyUseTool
1036+
// This prevents the stream from being interrupted with "Response interrupted by tool use result"
1037+
if (toolProtocol === TOOL_PROTOCOL.NATIVE && toolCallId) {
1038+
cline.userMessageContent.push({
1039+
type: "tool_result",
1040+
tool_use_id: toolCallId,
1041+
content: formatResponse.toolError(errorMessage, toolProtocol),
1042+
is_error: true,
1043+
} as Anthropic.ToolResultBlockParam)
1044+
} else {
1045+
pushToolResult(formatResponse.toolError(errorMessage, toolProtocol))
1046+
}
1047+
break
1048+
}
9981049
}
9991050

10001051
break
1052+
}
10011053
}
10021054

10031055
// Seeing out of bounds is fine, it means that the next too call is being

0 commit comments

Comments
 (0)