feat: update tool choice to 'auto' and enhance tool call handling across multiple components

NaccOll · NaccOll · commit 548690672950 · 2025-08-18T01:26:58.000+08:00
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
@@ -98,7 +98,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			}
 			if (toolCallEnabled) {
 				params.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs)
-				params.tool_choice = "required"
+				params.tool_choice = "auto"
 			}
 
 			if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
@@ -169,7 +169,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			}
 			if (toolCallEnabled) {
 				requestOptions.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs)
-				requestOptions.tool_choice = "required"
+				requestOptions.tool_choice = "auto"
 			}
 
 			// Add max_tokens if needed
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -139,7 +139,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		}
 		if (toolCallEnabled) {
 			completionParams.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs!)
-			completionParams.tool_choice = "required"
+			completionParams.tool_choice = "auto"
 		}
 
 		const stream = await this.client.chat.completions.create(completionParams)
diff --git a/src/core/assistant-message/AssistantMessageParser.ts b/src/core/assistant-message/AssistantMessageParser.ts
@@ -1,6 +1,7 @@
 import { type ToolName, toolNames } from "@roo-code/types"
 import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools"
 import { AssistantMessageContent } from "./parseAssistantMessage"
+import { ToolCallParam } from "../task/tool-call-helper"
 
 /**
  * Parser for assistant messages. Maintains state between chunks
@@ -51,7 +52,7 @@ export class AssistantMessageParser {
 	 * Process a new chunk of text and update the parser state.
 	 * @param chunk The new chunk of text to process.
 	 */
-	public processChunk(chunk: string): AssistantMessageContent[] {
+	public processChunk(chunk: string, toolCallParam?: ToolCallParam): AssistantMessageContent[] {
 		if (this.accumulator.length + chunk.length > this.MAX_ACCUMULATOR_SIZE) {
 			throw new Error("Assistant message exceeds maximum allowed size")
 		}
@@ -174,6 +175,11 @@ export class AssistantMessageParser {
 						name: extractedToolName as ToolName,
 						params: {},
 						partial: true,
+						toolUseId: toolCallParam && toolCallParam.toolUserId ? toolCallParam.toolUserId : undefined,
+						toolUseParam:
+							toolCallParam && toolCallParam?.anthropicContent
+								? toolCallParam?.anthropicContent
+								: undefined,
 					}
 
 					this.currentToolUseStartIndex = this.accumulator.length
diff --git a/src/core/assistant-message/parseAssistantMessage.ts b/src/core/assistant-message/parseAssistantMessage.ts
@@ -1,10 +1,14 @@
 import { type ToolName, toolNames } from "@roo-code/types"
 
 import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools"
+import { ToolCallParam } from "../task/tool-call-helper"
 
 export type AssistantMessageContent = TextContent | ToolUse
 
-export function parseAssistantMessage(assistantMessage: string): AssistantMessageContent[] {
+export function parseAssistantMessage(
+	assistantMessage: string,
+	toolCallParam?: ToolCallParam,
+): AssistantMessageContent[] {
 	let contentBlocks: AssistantMessageContent[] = []
 	let currentTextContent: TextContent | undefined = undefined
 	let currentTextContentStartIndex = 0
@@ -103,6 +107,9 @@ export function parseAssistantMessage(assistantMessage: string): AssistantMessag
 					name: toolUseOpeningTag.slice(1, -1) as ToolName,
 					params: {},
 					partial: true,
+					toolUseId: toolCallParam && toolCallParam.toolUserId ? toolCallParam.toolUserId : undefined,
+					toolUseParam:
+						toolCallParam && toolCallParam?.anthropicContent ? toolCallParam?.anthropicContent : undefined,
 				}
 
 				currentToolUseStartIndex = accumulator.length
diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts
@@ -33,6 +33,7 @@ import { Task } from "../task/Task"
 import { codebaseSearchTool } from "../tools/codebaseSearchTool"
 import { experiments, EXPERIMENT_IDS } from "../../shared/experiments"
 import { applyDiffToolLegacy } from "../tools/applyDiffTool"
+import Anthropic from "@anthropic-ai/sdk"
 
 /**
  * Processes and presents assistant message content to the user interface.
@@ -61,6 +62,7 @@ export async function presentAssistantMessage(cline: Task) {
 		return
 	}
 
+	const toolCallEnabled = cline.apiConfiguration?.toolCallEnabled
 	cline.presentAssistantMessageLocked = true
 	cline.presentAssistantMessageHasPendingUpdates = false
 
@@ -245,12 +247,28 @@ export async function presentAssistantMessage(cline: Task) {
 			}
 
 			const pushToolResult = (content: ToolResponse) => {
-				cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` })
-
+				const newUserMessages: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = [
+					{ type: "text", text: `${toolDescription()} Result:` },
+				]
 				if (typeof content === "string") {
-					cline.userMessageContent.push({ type: "text", text: content || "(tool did not return anything)" })
+					newUserMessages.push({ type: "text", text: content || "(tool did not return anything)" })
+				} else {
+					newUserMessages.push(...content)
+				}
+
+				if (toolCallEnabled) {
+					const lastToolUseMessage = cline.assistantMessageContent.find((msg) => msg.type === "tool_use")
+					if (lastToolUseMessage && lastToolUseMessage.toolUseId) {
+						const toolUseId = lastToolUseMessage.toolUseId
+						const toolMessage: Anthropic.ToolResultBlockParam = {
+							tool_use_id: toolUseId,
+							type: "tool_result",
+							content: newUserMessages,
+						}
+						cline.userMessageContent.push(toolMessage)
+					}
 				} else {
-					cline.userMessageContent.push(...content)
+					cline.userMessageContent.push(...newUserMessages)
 				}
 
 				// Once a tool result has been collected, ignore all other tool
@@ -429,7 +447,7 @@ export async function presentAssistantMessage(cline: Task) {
 						)
 					}
 
-					if (isMultiFileApplyDiffEnabled || cline.apiConfiguration.toolCallEnabled === true) {
+					if (isMultiFileApplyDiffEnabled || toolCallEnabled) {
 						await checkpointSaveAndMark(cline)
 						await applyDiffTool(cline, block, askApproval, handleError, pushToolResult, removeClosingTag)
 					} else {
diff --git a/src/core/prompts/sections/tool-use-guidelines.ts b/src/core/prompts/sections/tool-use-guidelines.ts
@@ -1,6 +1,10 @@
 import { CodeIndexManager } from "../../../services/code-index/manager"
+import { SystemPromptSettings } from "../types"
 
-export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager): string {
+export function getToolUseGuidelinesSection(
+	codeIndexManager?: CodeIndexManager,
+	settings?: SystemPromptSettings,
+): string {
 	const isCodebaseSearchAvailable =
 		codeIndexManager &&
 		codeIndexManager.isFeatureEnabled &&
@@ -34,7 +38,9 @@ export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager)
 	guidelinesList.push(
 		`${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`,
 	)
-	guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`)
+	if (settings?.toolCallEnabled !== true) {
+		guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`)
+	}
 	guidelinesList.push(`${itemNumber++}. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include:
   - Information about whether the tool succeeded or failed, along with any reasons for failure.
   - Linter errors that may have arisen due to the changes you made, which you'll need to address.
diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts
@@ -108,7 +108,7 @@ ${getToolDescriptionsForMode(
 	enableMcpServerCreation,
 )}
 
-${getToolUseGuidelinesSection(codeIndexManager)}
+${getToolUseGuidelinesSection(codeIndexManager, settings)}
 
 ${mcpServersSection}
 
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
@@ -108,7 +108,7 @@ import { getMessagesSinceLastSummary, summarizeConversation } from "../condense"
 import { maybeRemoveImageBlocks } from "../../api/transform/image-cleaning"
 import { restoreTodoListForTask } from "../tools/updateTodoListTool"
 import { AutoApprovalHandler } from "./AutoApprovalHandler"
-import { StreamingToolCallProcessor, handleOpenaiToolCallStreaming } from "./tool-call-helper"
+import { StreamingToolCallProcessor, ToolCallParam, handleOpenaiToolCallStreaming } from "./tool-call-helper"
 import { ToolArgs } from "../prompts/tools/types"
 
 const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes
@@ -272,7 +272,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 	assistantMessageContent: AssistantMessageContent[] = []
 	presentAssistantMessageLocked = false
 	presentAssistantMessageHasPendingUpdates = false
-	userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[] = []
+	userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = []
 	userMessageContentReady = false
 	didRejectTool = false
 	didAlreadyUseTool = false
@@ -1211,41 +1211,41 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		// Make sure that the api conversation history can be resumed by the API,
 		// even if it goes out of sync with cline messages.
 		let existingApiConversationHistory: ApiMessage[] = await this.getSavedApiConversationHistory()
-
-		// v2.0 xml tags refactor caveat: since we don't use tools anymore, we need to replace all tool use blocks with a text block since the API disallows conversations with tool uses and no tool schema
-		const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => {
-			if (Array.isArray(message.content)) {
-				const newContent = message.content.map((block) => {
-					if (block.type === "tool_use") {
-						// It's important we convert to the new tool schema
-						// format so the model doesn't get confused about how to
-						// invoke tools.
-						const inputAsXml = Object.entries(block.input as Record<string, string>)
-							.map(([key, value]) => `<${key}>\n${value}\n</${key}>`)
-							.join("\n")
-						return {
-							type: "text",
-							text: `<${block.name}>\n${inputAsXml}\n</${block.name}>`,
-						} as Anthropic.Messages.TextBlockParam
-					} else if (block.type === "tool_result") {
-						// Convert block.content to text block array, removing images
-						const contentAsTextBlocks = Array.isArray(block.content)
-							? block.content.filter((item) => item.type === "text")
-							: [{ type: "text", text: block.content }]
-						const textContent = contentAsTextBlocks.map((item) => item.text).join("\n\n")
-						const toolName = findToolName(block.tool_use_id, existingApiConversationHistory)
-						return {
-							type: "text",
-							text: `[${toolName} Result]\n\n${textContent}`,
-						} as Anthropic.Messages.TextBlockParam
-					}
-					return block
-				})
-				return { ...message, content: newContent }
-			}
-			return message
-		})
-		existingApiConversationHistory = conversationWithoutToolBlocks
+		if (this.apiConfiguration.toolCallEnabled !== true) {
+			const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => {
+				if (Array.isArray(message.content)) {
+					const newContent = message.content.map((block) => {
+						if (block.type === "tool_use") {
+							// It's important we convert to the new tool schema
+							// format so the model doesn't get confused about how to
+							// invoke tools.
+							const inputAsXml = Object.entries(block.input as Record<string, string>)
+								.map(([key, value]) => `<${key}>\n${value}\n</${key}>`)
+								.join("\n")
+							return {
+								type: "text",
+								text: `<${block.name}>\n${inputAsXml}\n</${block.name}>`,
+							} as Anthropic.Messages.TextBlockParam
+						} else if (block.type === "tool_result") {
+							// Convert block.content to text block array, removing images
+							const contentAsTextBlocks = Array.isArray(block.content)
+								? block.content.filter((item) => item.type === "text")
+								: [{ type: "text", text: block.content }]
+							const textContent = contentAsTextBlocks.map((item) => item.text).join("\n\n")
+							const toolName = findToolName(block.tool_use_id, existingApiConversationHistory)
+							return {
+								type: "text",
+								text: `[${toolName} Result]\n\n${textContent}`,
+							} as Anthropic.Messages.TextBlockParam
+						}
+						return block
+					})
+					return { ...message, content: newContent }
+				}
+				return message
+			})
+			existingApiConversationHistory = conversationWithoutToolBlocks
+		}
 
 		// FIXME: remove tool use blocks altogether
 
@@ -1794,13 +1794,15 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 							case "text":
 							case "tool_call": {
 								let chunkContent
+								let toolParam: ToolCallParam | undefined
 								if (chunk.type == "tool_call") {
-									chunkContent =
+									toolParam =
 										handleOpenaiToolCallStreaming(
 											this.streamingToolCallProcessor,
 											chunk.toolCalls,
 											chunk.toolCallType,
 										) ?? ""
+									chunkContent = toolParam.chunkContent
 								} else {
 									chunkContent = chunk.text
 								}
@@ -1809,11 +1811,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								// Parse raw assistant message chunk into content blocks.
 								const prevLength = this.assistantMessageContent.length
 								if (this.isAssistantMessageParserEnabled && this.assistantMessageParser) {
-									this.assistantMessageContent =
-										this.assistantMessageParser.processChunk(chunkContent)
+									this.assistantMessageContent = this.assistantMessageParser.processChunk(
+										chunkContent,
+										toolParam,
+									)
 								} else {
 									// Use the old parsing method when experiment is disabled
-									this.assistantMessageContent = parseAssistantMessage(assistantMessage)
+									this.assistantMessageContent = parseAssistantMessage(assistantMessage, toolParam)
 								}
 
 								if (this.assistantMessageContent.length > prevLength) {
@@ -2107,10 +2111,34 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				let didEndLoop = false
 
 				if (assistantMessage.length > 0) {
-					await this.addToApiConversationHistory({
-						role: "assistant",
-						content: [{ type: "text", text: assistantMessage }],
-					})
+					if (this.apiConfiguration.toolCallEnabled !== true) {
+						await this.addToApiConversationHistory({
+							role: "assistant",
+							content: [{ type: "text", text: assistantMessage }],
+						})
+					} else {
+						for (const block of this.assistantMessageContent) {
+							if (block.type === "text" && block.content) {
+								await this.addToApiConversationHistory({
+									role: "assistant",
+									content: [{ type: "text", text: block.content }],
+								})
+							}
+							if (block.type === "tool_use" && block.toolUseId && block.toolUseParam) {
+								await this.addToApiConversationHistory({
+									role: "assistant",
+									content: [
+										{
+											type: "tool_use",
+											id: block.toolUseId,
+											name: block.name,
+											input: block.toolUseParam.input,
+										},
+									],
+								})
+							}
+						}
+					}
 
 					TelemetryService.instance.captureConversationMessage(this.taskId, "assistant")
 
diff --git a/src/core/task/__tests__/tool-call-helper.spec.ts b/src/core/task/__tests__/tool-call-helper.spec.ts
@@ -306,7 +306,7 @@ describe("handleOpenaiToolCallStreaming", () => {
 	it("should delegate to processor.processChunk", () => {
 		const processor = new StreamingToolCallProcessor()
 		const chunk = [{ index: 0, id: "1", function: { name: "echo", arguments: '{"msg":"hi"}' } }]
-		const xml = handleOpenaiToolCallStreaming(processor, chunk, "openai")
+		const xml = handleOpenaiToolCallStreaming(processor, chunk, "openai").chunkContent
 		expect(xml).toContain("<echo>")
 		expect(xml).toContain("<msg>hi</msg>")
 	})
diff --git a/src/core/task/tool-call-helper.ts b/src/core/task/tool-call-helper.ts
diff --git a/src/shared/tools.ts b/src/shared/tools.ts
diff --git a/src/utils/__tests__/xml.spec.ts b/src/utils/__tests__/xml.spec.ts

Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan`
`98`	`98`	`}`
`99`	`99`	`if (toolCallEnabled) {`
`100`	`100`	`params.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs)`
`101`		`- params.tool_choice = "required"`
	`101`	`+ params.tool_choice = "auto"`
`102`	`102`	`}`
`103`	`103`
`104`	`104`	`if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {`
Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl`
`169`	`169`	`}`
`170`	`170`	`if (toolCallEnabled) {`
`171`	`171`	`requestOptions.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs)`
`172`		`- requestOptions.tool_choice = "required"`
	`172`	`+ requestOptions.tool_choice = "auto"`
`173`	`173`	`}`
`174`	`174`
`175`	`175`	`// Add max_tokens if needed`
Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH`
`139`	`139`	`}`
`140`	`140`	`if (toolCallEnabled) {`
`141`	`141`	`completionParams.tools = toolRegistry.generateFunctionCallSchemas(metadata.tools!, metadata.toolArgs!)`
`142`		`- completionParams.tool_choice = "required"`
	`142`	`+ completionParams.tool_choice = "auto"`
`143`	`143`	`}`
`144`	`144`
`145`	`145`	`const stream = await this.client.chat.completions.create(completionParams)`