diff --git a/package.json b/package.json index f7f351d3fc21..8992635cbde2 100644 --- a/package.json +++ b/package.json @@ -62,5 +62,8 @@ "form-data": ">=4.0.4", "bluebird": ">=3.7.2" } + }, + "dependencies": { + "@streamparser/json": "^0.0.22" } } diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 705f17039ec8..a7b850d67063 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -58,6 +58,8 @@ export const modelInfoSchema = z.object({ contextWindow: z.number(), supportsImages: z.boolean().optional(), supportsPromptCache: z.boolean(), + // Capability flag to indicate whether the model supports native tool calling + supportsNativeToolCalling: z.boolean().optional(), // Capability flag to indicate whether the model supports an output verbosity parameter supportsVerbosity: z.boolean().optional(), supportsReasoningBudget: z.boolean().optional(), diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 5fbf62d50782..d842da85f58e 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -11,6 +11,7 @@ export const anthropicModels = { contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3.0, // $3 per million input tokens (≤200K context) outputPrice: 15.0, // $15 per million output tokens (≤200K context) cacheWritesPrice: 3.75, // $3.75 per million tokens @@ -32,6 +33,7 @@ export const anthropicModels = { contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3.0, // $3 per million input tokens (≤200K context) outputPrice: 15.0, // $15 per million output tokens (≤200K context) cacheWritesPrice: 3.75, // $3.75 per million tokens @@ -53,6 +55,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 15.0, // $15 per million input tokens outputPrice: 75.0, // $75 per million output tokens cacheWritesPrice: 18.75, // $18.75 per million tokens @@ -64,6 +67,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 15.0, // $15 per million input tokens outputPrice: 75.0, // $75 per million output tokens cacheWritesPrice: 18.75, // $18.75 per million tokens @@ -75,6 +79,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens @@ -87,6 +92,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens @@ -97,6 +103,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3.0, // $3 per million input tokens outputPrice: 15.0, // $15 per million output tokens cacheWritesPrice: 3.75, // $3.75 per million tokens @@ -107,6 +114,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.0, outputPrice: 5.0, cacheWritesPrice: 1.25, @@ -117,6 +125,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 15.0, outputPrice: 75.0, cacheWritesPrice: 18.75, @@ -127,6 +136,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 0.25, outputPrice: 1.25, cacheWritesPrice: 0.3, @@ -137,6 +147,7 @@ export const anthropicModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.0, outputPrice: 5.0, cacheWritesPrice: 1.25, diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts index a3eed1b57ce6..6010a3f42fc1 100644 --- a/packages/types/src/providers/openai.ts +++ b/packages/types/src/providers/openai.ts @@ -11,6 +11,7 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, supportsReasoningEffort: false, inputPrice: 1.25, outputPrice: 10.0, @@ -23,6 +24,7 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, supportsReasoningEffort: true, reasoningEffort: "medium", inputPrice: 1.25, @@ -42,6 +44,7 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, supportsReasoningEffort: true, reasoningEffort: "medium", inputPrice: 0.25, @@ -60,6 +63,7 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, supportsReasoningEffort: true, reasoningEffort: "medium", inputPrice: 0.05, @@ -75,6 +79,7 @@ export const openAiNativeModels = { contextWindow: 400000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, supportsReasoningEffort: true, reasoningEffort: "medium", inputPrice: 1.25, @@ -89,6 +94,7 @@ export const openAiNativeModels = { contextWindow: 1_047_576, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 2, outputPrice: 8, cacheReadsPrice: 0.5, @@ -102,6 +108,7 @@ export const openAiNativeModels = { contextWindow: 1_047_576, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 0.4, outputPrice: 1.6, cacheReadsPrice: 0.1, @@ -115,6 +122,7 @@ export const openAiNativeModels = { contextWindow: 1_047_576, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 0.1, outputPrice: 0.4, cacheReadsPrice: 0.025, @@ -128,6 +136,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5, @@ -144,6 +153,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5, @@ -155,6 +165,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5, @@ -166,6 +177,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.275, @@ -182,6 +194,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.275, @@ -193,6 +206,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.275, @@ -204,6 +218,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.55, @@ -216,6 +231,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.55, @@ -227,6 +243,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: false, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.55, @@ -238,6 +255,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 15, outputPrice: 60, cacheReadsPrice: 7.5, @@ -248,6 +266,7 @@ export const openAiNativeModels = { contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 15, outputPrice: 60, cacheReadsPrice: 7.5, @@ -258,6 +277,7 @@ export const openAiNativeModels = { contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 1.1, outputPrice: 4.4, cacheReadsPrice: 0.55, @@ -268,6 +288,7 @@ export const openAiNativeModels = { contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 2.5, outputPrice: 10, cacheReadsPrice: 1.25, @@ -281,6 +302,7 @@ export const openAiNativeModels = { contextWindow: 128_000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 0.15, outputPrice: 0.6, cacheReadsPrice: 0.075, @@ -294,6 +316,7 @@ export const openAiNativeModels = { contextWindow: 200_000, supportsImages: false, supportsPromptCache: false, + supportsNativeToolCalling: true, inputPrice: 1.5, outputPrice: 6, cacheReadsPrice: 0, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 637469dbf073..695a3cef7844 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,6 +15,10 @@ overrides: importers: .: + dependencies: + '@streamparser/json': + specifier: ^0.0.22 + version: 0.0.22 devDependencies: '@changesets/cli': specifier: ^2.27.10 @@ -653,6 +657,9 @@ importers: '@roo-code/types': specifier: workspace:^ version: link:../packages/types + '@streamparser/json': + specifier: ^0.0.22 + version: 0.0.22 '@vscode/codicons': specifier: ^0.0.36 version: 0.0.36 @@ -3627,6 +3634,9 @@ packages: '@standard-schema/utils@0.3.0': resolution: {integrity: sha512-e7Mew686owMaPJVNNLs55PUvgz371nKgwsc4vxE49zsODpJEnxgxRo2y/OKrqueavXgZNMDVj3DdHFlaSAeU8g==} + '@streamparser/json@0.0.22': + resolution: {integrity: sha512-b6gTSBjJ8G8SuO3Gbbj+zXbVx8NSs1EbpbMKpzGLWMdkR+98McH9bEjSz3+0mPJf68c5nxa3CrJHp5EQNXM6zQ==} + '@swc/counter@0.1.3': resolution: {integrity: sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==} @@ -13284,6 +13294,8 @@ snapshots: '@standard-schema/utils@0.3.0': {} + '@streamparser/json@0.0.22': {} + '@swc/counter@0.1.3': {} '@swc/helpers@0.5.15': @@ -13994,7 +14006,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.57)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: diff --git a/src/api/index.ts b/src/api/index.ts index 351f4ef1befe..0623b3040d70 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import type { ProviderSettings, ModelInfo } from "@roo-code/types" import { ApiStream } from "./transform/stream" +import type { ToolSpec } from "./transform/tool-converters" import { GlamaHandler, @@ -73,6 +74,7 @@ export interface ApiHandler { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream getModel(): { id: string; info: ModelInfo } @@ -86,6 +88,15 @@ export interface ApiHandler { * @returns A promise resolving to the token count */ countTokens(content: Array): Promise + + /** + * Returns whether this provider supports native tool calling + * Providers that support native tools can receive tool specifications and + * handle them in their native format instead of using XML-based tools + * + * @returns true if the provider supports native tool calling, false otherwise + */ + supportsNativeTools(): boolean } export function buildApiHandler(configuration: ProviderSettings): ApiHandler { diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index b2e158eca53e..7872eb942325 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -14,6 +14,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { toolSpecToAnthropicTool, type ToolSpec } from "../transform/tool-converters" import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" @@ -40,6 +41,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { let stream: AnthropicStream const cacheControl: CacheControlEphemeral = { type: "ephemeral" } @@ -53,6 +55,10 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa betas.push("context-1m-2025-08-07") } + // Convert tools to Anthropic format if provided + const anthropicTools = tools?.map(toolSpecToAnthropicTool) + const nativeToolsOn = anthropicTools && anthropicTools.length > 0 + switch (modelId) { case "claude-sonnet-4-5": case "claude-sonnet-4-20250514": @@ -107,6 +113,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa return message }), stream: true, + tools: nativeToolsOn ? anthropicTools : undefined, }, (() => { // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 @@ -151,6 +158,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa let outputTokens = 0 let cacheWriteTokens = 0 let cacheReadTokens = 0 + const lastStartedToolCall = { id: "", name: "", arguments: "" } for await (const chunk of stream) { switch (chunk.type) { @@ -202,6 +210,14 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa yield { type: "reasoning", text: chunk.content_block.thinking } break + case "tool_use": + // Convert Anthropic tool_use to OpenAI-compatible format + if (chunk.content_block.id && chunk.content_block.name) { + lastStartedToolCall.id = chunk.content_block.id + lastStartedToolCall.name = chunk.content_block.name + lastStartedToolCall.arguments = "" + } + break case "text": // We may receive multiple text blocks, in which // case just insert a line break between them. @@ -218,6 +234,21 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "thinking_delta": yield { type: "reasoning", text: chunk.delta.thinking } break + case "input_json_delta": + // Stream tool arguments as they arrive + if (lastStartedToolCall.id && lastStartedToolCall.name && chunk.delta.partial_json) { + yield { + type: "tool_calls", + tool_call: { + function: { + id: lastStartedToolCall.id, + name: lastStartedToolCall.name, + arguments: chunk.delta.partial_json, + }, + }, + } + } + break case "text_delta": yield { type: "text", text: chunk.delta.text } break @@ -225,6 +256,10 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa break case "content_block_stop": + // Reset tool call state when block ends + lastStartedToolCall.id = "" + lastStartedToolCall.name = "" + lastStartedToolCall.arguments = "" break } } diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index 1abbf5f558cb..8d05da8970d9 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -1,4 +1,5 @@ import { Anthropic } from "@anthropic-ai/sdk" +import * as vscode from "vscode" import type { ModelInfo } from "@roo-code/types" @@ -32,4 +33,21 @@ export abstract class BaseProvider implements ApiHandler { return countTokens(content, { useWorker: true }) } + + /** + * Returns whether native tool calling is supported for the current model AND enabled by user. + * Checks the model's supportsNativeToolCalling property and the user setting. + * + * @returns true if model supports it AND setting is enabled, false otherwise + */ + supportsNativeTools(): boolean { + const model = this.getModel() + const modelSupportsNativeTools = model.info.supportsNativeToolCalling ?? false + + if (!modelSupportsNativeTools) { + return false + } + + return vscode.workspace.getConfiguration("roo-cline").get("nativeToolCalling", false) + } } diff --git a/src/api/providers/fake-ai.ts b/src/api/providers/fake-ai.ts index c73752fc6619..b0bbb72af13f 100644 --- a/src/api/providers/fake-ai.ts +++ b/src/api/providers/fake-ai.ts @@ -78,4 +78,8 @@ export class FakeAIHandler implements ApiHandler, SingleCompletionHandler { completePrompt(prompt: string): Promise { return this.ai.completePrompt(prompt) } + + supportsNativeTools(): boolean { + return false + } } diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 37cdc5443980..02a764dd00a3 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -21,6 +21,7 @@ describe("OpenRouter API", () => { contextWindow: 200000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3, outputPrice: 15, cacheWritesPrice: 3.75, @@ -36,6 +37,7 @@ describe("OpenRouter API", () => { contextWindow: 200000, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: true, inputPrice: 3, outputPrice: 15, cacheWritesPrice: 3.75, @@ -89,6 +91,7 @@ describe("OpenRouter API", () => { contextWindow: 1048576, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: false, supportsReasoningBudget: true, inputPrice: 1.25, outputPrice: 10, @@ -103,6 +106,7 @@ describe("OpenRouter API", () => { contextWindow: 1048576, supportsImages: true, supportsPromptCache: true, + supportsNativeToolCalling: false, supportsReasoningBudget: true, inputPrice: 1.25, outputPrice: 10, diff --git a/src/api/providers/fetchers/__tests__/roo.spec.ts b/src/api/providers/fetchers/__tests__/roo.spec.ts index dcc79e941fa3..edef63bc8aed 100644 --- a/src/api/providers/fetchers/__tests__/roo.spec.ts +++ b/src/api/providers/fetchers/__tests__/roo.spec.ts @@ -69,6 +69,7 @@ describe("getRooModels", () => { supportsReasoningEffort: true, requiredReasoningEffort: false, supportsPromptCache: true, + supportsNativeToolCalling: false, inputPrice: 100, // 0.0001 * 1_000_000 outputPrice: 200, // 0.0002 * 1_000_000 cacheWritesPrice: 100, // 0.0001 * 1_000_000 @@ -117,6 +118,7 @@ describe("getRooModels", () => { supportsReasoningEffort: true, requiredReasoningEffort: true, supportsPromptCache: false, + supportsNativeToolCalling: false, inputPrice: 100, // 0.0001 * 1_000_000 outputPrice: 200, // 0.0002 * 1_000_000 cacheWritesPrice: undefined, @@ -163,6 +165,7 @@ describe("getRooModels", () => { supportsReasoningEffort: false, requiredReasoningEffort: false, supportsPromptCache: false, + supportsNativeToolCalling: false, inputPrice: 100, // 0.0001 * 1_000_000 outputPrice: 200, // 0.0002 * 1_000_000 cacheWritesPrice: undefined, diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index b546c40a3cfc..7af2c2a67872 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -210,6 +210,7 @@ export const parseOpenRouterModel = ({ contextWindow: model.context_length, supportsImages: inputModality?.includes("image") ?? false, supportsPromptCache, + supportsNativeToolCalling: supportedParameters?.includes("tools") ?? false, inputPrice: parseApiPrice(model.pricing?.prompt), outputPrice: parseApiPrice(model.pricing?.completion), cacheWritesPrice, diff --git a/src/api/providers/fetchers/roo.ts b/src/api/providers/fetchers/roo.ts index 17aec4253b31..a4ececfc52f5 100644 --- a/src/api/providers/fetchers/roo.ts +++ b/src/api/providers/fetchers/roo.ts @@ -83,6 +83,9 @@ export async function getRooModels(baseUrl: string, apiKey?: string): Promise void) | undefined // Resolved service tier from Responses API (actual tier used by OpenAI) private lastServiceTier: ServiceTier | undefined + private toolCallProcessor: ToolCallProcessor // Event types handled by the shared event processor to avoid duplication private readonly coreHandledEventTypes = new Set([ @@ -63,6 +66,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } const apiKey = this.options.openAiNativeApiKey ?? "not-provided" this.client = new OpenAI({ baseURL: this.options.openAiNativeBaseUrl, apiKey }) + this.toolCallProcessor = new ToolCallProcessor() } private normalizeUsage(usage: any, model: OpenAiNativeModel): ApiStreamUsageChunk | undefined { @@ -141,11 +145,19 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { + // Reset tool call processor for new message + this.toolCallProcessor.reset() + const model = this.getModel() + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 + // Use Responses API for ALL models - yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata) + yield* this.handleResponsesApiMessage(model, systemPrompt, messages, metadata, tools) } private async *handleResponsesApiMessage( @@ -153,6 +165,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { // Reset resolved tier for this request; will be set from response if present this.lastServiceTier = undefined @@ -215,10 +228,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio verbosity, reasoningEffort, metadata, + tools, ) // Make the request (pass systemPrompt and messages for potential retry) - yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages) + yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages, tools) } private buildRequestBody( @@ -229,7 +243,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio verbosity: any, reasoningEffort: ReasoningEffortWithMinimal | undefined, metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): any { + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 // Build a request body (also used for fallback) // Ensure we explicitly pass max_output_tokens for GPT‑5 based on Roo's reserved model response calculation // so requests do not default to very large limits (e.g., 120k). @@ -245,6 +263,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio store?: boolean instructions?: string service_tier?: ServiceTier + tools?: any[] + tool_choice?: "auto" | "none" } // Validate requested tier against model support; if not supported, omit. @@ -290,6 +310,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio body.text = { verbosity: (verbosity || "medium") as VerbosityLevel } } + // Add native tool calling support + if (nativeToolsOn) { + body.tools = openAITools + body.tool_choice = "auto" as const + } + return body } @@ -299,6 +325,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata?: ApiHandlerCreateMessageMetadata, systemPrompt?: string, messages?: Anthropic.Messages.MessageParam[], + tools?: ToolSpec[], ): ApiStream { try { // Use the official SDK @@ -311,7 +338,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } for await (const event of stream) { - for await (const outChunk of this.processEvent(event, model)) { + for await (const outChunk of this.processEvent(event, model, !!tools?.length)) { yield outChunk } } @@ -352,25 +379,33 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata, systemPrompt, messages, + tools, ) return } for await (const event of retryStream) { - for await (const outChunk of this.processEvent(event, model)) { + for await (const outChunk of this.processEvent(event, model, !!tools?.length)) { yield outChunk } } return } catch (retryErr) { // If retry also fails, fall back to SSE - yield* this.makeGpt5ResponsesAPIRequest(retryRequestBody, model, metadata, systemPrompt, messages) + yield* this.makeGpt5ResponsesAPIRequest( + retryRequestBody, + model, + metadata, + systemPrompt, + messages, + tools, + ) return } } // For other errors, fallback to manual SSE via fetch - yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages) + yield* this.makeGpt5ResponsesAPIRequest(requestBody, model, metadata, systemPrompt, messages, tools) } } @@ -461,6 +496,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata?: ApiHandlerCreateMessageMetadata, systemPrompt?: string, messages?: Anthropic.Messages.MessageParam[], + tools?: ToolSpec[], ): ApiStream { const apiKey = this.options.openAiNativeApiKey ?? "not-provided" const baseUrl = this.options.openAiNativeBaseUrl || "https://api.openai.com" @@ -541,7 +577,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Handle the successful retry response - yield* this.handleStreamResponse(retryResponse.body, model) + yield* this.handleStreamResponse(retryResponse.body, model, tools) return } @@ -585,7 +621,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio } // Handle streaming response - yield* this.handleStreamResponse(response.body, model) + yield* this.handleStreamResponse(response.body, model, tools) } catch (error) { if (error instanceof Error) { // Re-throw with the original error message if it's already formatted @@ -648,7 +684,12 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio * and yields structured data chunks (`ApiStream`). It handles a wide variety of event types, * including text deltas, reasoning, usage data, and various status/tool events. */ - private async *handleStreamResponse(body: ReadableStream, model: OpenAiNativeModel): ApiStream { + private async *handleStreamResponse( + body: ReadableStream, + model: OpenAiNativeModel, + tools?: ToolSpec[], + ): ApiStream { + const nativeToolsOn = tools && tools.length > 0 const reader = body.getReader() const decoder = new TextDecoder() let buffer = "" @@ -686,7 +727,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Delegate standard event types to the shared processor to avoid duplication if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) { - for await (const outChunk of this.processEvent(parsed, model)) { + for await (const outChunk of this.processEvent(parsed, model, !!tools?.length)) { // Track whether we've emitted any content so fallback handling can decide appropriately if (outChunk.type === "text" || outChunk.type === "reasoning") { hasContent = true @@ -1097,7 +1138,7 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio /** * Shared processor for Responses API events. */ - private async *processEvent(event: any, model: OpenAiNativeModel): ApiStream { + private async *processEvent(event: any, model: OpenAiNativeModel, nativeToolsOn: boolean = false): ApiStream { // Persist response id for conversation continuity when available if (event?.response?.id) { this.resolveResponseId(event.response.id) @@ -1151,6 +1192,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio yield { type: "text", text: content.text } } } + } else if (nativeToolsOn && item.type === "function_call" && item.function) { + // Handle function call in output items (native tool calling) + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas([ + { + id: item.id, + function: item.function, + }, + ])) { + yield toolCallChunk + } } } return @@ -1166,6 +1217,16 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio return } + // Handle tool call deltas from standard Chat Completions format + if (nativeToolsOn && event?.choices?.[0]?.delta?.tool_calls) { + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas( + event.choices[0].delta.tool_calls, + )) { + yield toolCallChunk + } + return + } + // Fallbacks for older formats or unexpected objects if (event?.choices?.[0]?.delta?.content) { yield { type: "text", text: event.choices[0].delta.content } diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index aebe671712a7..0706d61fc4f2 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -19,6 +19,8 @@ import { convertToR1Format } from "../transform/r1-format" import { convertToSimpleMessages } from "../transform/simple-format" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import { ToolCallProcessor } from "../transform/tool-call-processor" +import { toolSpecToOpenAITool, type ToolSpec } from "../transform/tool-converters" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" @@ -33,10 +35,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl protected options: ApiHandlerOptions private client: OpenAI private readonly providerName = "OpenAI" + private toolCallProcessor: ToolCallProcessor constructor(options: ApiHandlerOptions) { super() this.options = options + this.toolCallProcessor = new ToolCallProcessor() const baseURL = this.options.openAiBaseUrl ?? "https://api.openai.com/v1" const apiKey = this.options.openAiApiKey ?? "not-provided" @@ -84,7 +88,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { + // Reset tool call processor for new message + this.toolCallProcessor.reset() + + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 + const { info: modelInfo, reasoning } = this.getModel() const modelUrl = this.options.openAiBaseUrl ?? "" const modelId = this.options.openAiModelId ?? "" @@ -164,6 +176,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl stream: true as const, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), ...(reasoning && reasoning), + // Add native tool calling support + ...(nativeToolsOn && { + tools: openAITools, + tool_choice: "auto" as const, + parallel_tool_calls: false, + }), } // Add max_tokens if needed @@ -193,6 +211,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl for await (const chunk of stream) { const delta = chunk.choices[0]?.delta ?? {} + // Handle native tool calls + if (nativeToolsOn && delta.tool_calls) { + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas(delta.tool_calls)) { + yield toolCallChunk + } + } + if (delta.content) { for (const chunk of matcher.update(delta.content)) { yield chunk diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 580b17331194..7b8b6ea68577 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -18,13 +18,15 @@ import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transfor import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini" import type { OpenRouterReasoningParams } from "../transform/reasoning" import { getModelParams } from "../transform/model-params" +import { ToolCallProcessor } from "../transform/tool-call-processor" +import { toolSpecToOpenAITool, type ToolSpec } from "../transform/tool-converters" import { getModels } from "./fetchers/modelCache" import { getModelEndpoints } from "./fetchers/modelEndpointCache" import { DEFAULT_HEADERS } from "./constants" import { BaseProvider } from "./base-provider" -import type { SingleCompletionHandler } from "../index" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" // Image generation types @@ -87,6 +89,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH protected models: ModelRecord = {} protected endpoints: ModelRecord = {} private readonly providerName = "OpenRouter" + private toolCallProcessor: ToolCallProcessor constructor(options: ApiHandlerOptions) { super() @@ -96,12 +99,22 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH const apiKey = this.options.openRouterApiKey ?? "not-provided" this.client = new OpenAI({ baseURL, apiKey, defaultHeaders: DEFAULT_HEADERS }) + this.toolCallProcessor = new ToolCallProcessor() } override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): AsyncGenerator { + // Reset tool call processor for new message + this.toolCallProcessor.reset() + + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 + const model = await this.fetchModel() let { id: modelId, maxTokens, temperature, topP, reasoning } = model @@ -161,6 +174,12 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }), ...(transforms && { transforms }), ...(reasoning && { reasoning }), + // Add native tool calling support + ...(nativeToolsOn && { + tools: openAITools, + tool_choice: "auto" as const, + parallel_tool_calls: false, + }), } let stream @@ -182,6 +201,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH const delta = chunk.choices[0]?.delta + // Handle native tool calls + if (nativeToolsOn && delta?.tool_calls) { + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas(delta.tool_calls)) { + yield toolCallChunk + } + } + if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { yield { type: "reasoning", text: delta.reasoning } } diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index 3bd1bb65dc36..c7cc55cf3fc3 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -1,3 +1,4 @@ +import * as vscode from "vscode" import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" @@ -10,6 +11,8 @@ import { getModelParams } from "../transform/model-params" import { convertToOpenAiMessages } from "../transform/openai-format" import type { RooReasoningParams } from "../transform/reasoning" import { getRooReasoning } from "../transform/reasoning" +import { ToolCallProcessor } from "../transform/tool-call-processor" +import { toolSpecToOpenAITool, type ToolSpec } from "../transform/tool-converters" import type { ApiHandlerCreateMessageMetadata } from "../index" import { DEFAULT_HEADERS } from "./constants" @@ -31,6 +34,7 @@ type RooChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & export class RooHandler extends BaseOpenAiCompatibleProvider { private authStateListener?: (state: { state: AuthState }) => void private fetcherBaseURL: string + private toolCallProcessor: ToolCallProcessor constructor(options: ApiHandlerOptions) { let sessionToken: string | undefined = undefined @@ -58,6 +62,8 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { defaultTemperature: 0.7, }) + this.toolCallProcessor = new ToolCallProcessor() + // Load dynamic models asynchronously - strip /v1 from baseURL for fetcher this.fetcherBaseURL = baseURL.endsWith("/v1") ? baseURL.slice(0, -3) : baseURL this.loadDynamicModels(this.fetcherBaseURL, sessionToken).catch((error) => { @@ -93,6 +99,7 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, requestOptions?: OpenAI.RequestOptions, + tools?: ToolSpec[], ) { const { id: model, info } = this.getModel() @@ -116,6 +123,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { const max_tokens = params.maxTokens ?? undefined const temperature = params.temperature ?? this.defaultTemperature + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 + const rooParams: RooChatCompletionParams = { model, max_tokens, @@ -124,6 +135,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { stream: true, stream_options: { include_usage: true }, ...(reasoning && { reasoning }), + ...(nativeToolsOn && { + tools: openAITools, + tool_choice: "auto" as const, + }), } try { @@ -137,15 +152,21 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { + // Reset tool call processor for new message + this.toolCallProcessor.reset() + const stream = await this.createStream( systemPrompt, messages, metadata, metadata?.taskId ? { headers: { "X-Roo-Task-ID": metadata.taskId } } : undefined, + tools, ) let lastUsage: RooUsage | undefined = undefined + const nativeToolsOn = tools && tools.length > 0 for await (const chunk of stream) { const delta = chunk.choices[0]?.delta @@ -167,6 +188,13 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { } } + // Handle native tool calls + if (nativeToolsOn && delta.tool_calls) { + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas(delta.tool_calls)) { + yield toolCallChunk + } + } + if (delta.content) { yield { type: "text", diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts index 25e9a11e1b2c..6500413f84a0 100644 --- a/src/api/providers/router-provider.ts +++ b/src/api/providers/router-provider.ts @@ -1,3 +1,4 @@ +import * as vscode from "vscode" import OpenAI from "openai" import type { ModelInfo } from "@roo-code/types" diff --git a/src/api/providers/vercel-ai-gateway.ts b/src/api/providers/vercel-ai-gateway.ts index be77d35986b4..57b8eef25c86 100644 --- a/src/api/providers/vercel-ai-gateway.ts +++ b/src/api/providers/vercel-ai-gateway.ts @@ -13,6 +13,8 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" import { addCacheBreakpoints } from "../transform/caching/vercel-ai-gateway" +import { ToolCallProcessor } from "../transform/tool-call-processor" +import { toolSpecToOpenAITool, type ToolSpec } from "../transform/tool-converters" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { RouterProvider } from "./router-provider" @@ -24,6 +26,8 @@ interface VercelAiGatewayUsage extends OpenAI.CompletionUsage { } export class VercelAiGatewayHandler extends RouterProvider implements SingleCompletionHandler { + private toolCallProcessor: ToolCallProcessor + constructor(options: ApiHandlerOptions) { super({ options, @@ -34,13 +38,22 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp defaultModelId: vercelAiGatewayDefaultModelId, defaultModelInfo: vercelAiGatewayDefaultModelInfo, }) + this.toolCallProcessor = new ToolCallProcessor() } override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, + tools?: ToolSpec[], ): ApiStream { + // Reset tool call processor for new message + this.toolCallProcessor.reset() + + // Convert tools to OpenAI format if provided + const openAITools = tools?.map(toolSpecToOpenAITool) + const nativeToolsOn = openAITools && openAITools.length > 0 + const { id: modelId, info } = await this.fetchModel() const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ @@ -60,12 +73,26 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp : undefined, max_completion_tokens: info.maxTokens, stream: true, + // Add native tool calling support + ...(nativeToolsOn && { + tools: openAITools, + tool_choice: "auto" as const, + parallel_tool_calls: false, + }), } const completion = await this.client.chat.completions.create(body) for await (const chunk of completion) { const delta = chunk.choices[0]?.delta + + // Handle native tool calls + if (nativeToolsOn && delta?.tool_calls) { + for (const toolCallChunk of this.toolCallProcessor.processToolCallDeltas(delta.tool_calls)) { + yield toolCallChunk + } + } + if (delta?.content) { yield { type: "text", diff --git a/src/api/transform/__tests__/image-cleaning.spec.ts b/src/api/transform/__tests__/image-cleaning.spec.ts index e32a4b8770f3..8d3c47e24a29 100644 --- a/src/api/transform/__tests__/image-cleaning.spec.ts +++ b/src/api/transform/__tests__/image-cleaning.spec.ts @@ -18,6 +18,7 @@ describe("maybeRemoveImageBlocks", () => { }), createMessage: vitest.fn(), countTokens: vitest.fn(), + supportsNativeTools: () => false, } } diff --git a/src/api/transform/stream.ts b/src/api/transform/stream.ts index 8484e6259580..985d2e23feb8 100644 --- a/src/api/transform/stream.ts +++ b/src/api/transform/stream.ts @@ -5,6 +5,7 @@ export type ApiStreamChunk = | ApiStreamUsageChunk | ApiStreamReasoningChunk | ApiStreamGroundingChunk + | ApiStreamToolCallsChunk | ApiStreamError export interface ApiStreamError { @@ -43,3 +44,14 @@ export interface GroundingSource { url: string snippet?: string } + +export interface ApiStreamToolCallsChunk { + type: "tool_calls" + tool_call: { + function: { + id: string + name: string + arguments: string + } + } +} diff --git a/src/api/transform/tool-call-processor.ts b/src/api/transform/tool-call-processor.ts new file mode 100644 index 000000000000..fbceea90fcfd --- /dev/null +++ b/src/api/transform/tool-call-processor.ts @@ -0,0 +1,65 @@ +import type { ApiStreamToolCallsChunk } from "./stream" + +/** + * Helper class to process tool call deltas from OpenAI-compatible streaming responses. + * Handles accumulating tool call ID and name across multiple delta chunks, + * and yields properly formatted tool call chunks when arguments are received. + */ +export class ToolCallProcessor { + private lastToolCall: { id: string; name: string } + + constructor() { + this.lastToolCall = { id: "", name: "" } + } + + /** + * Process tool call deltas from a chunk and yield formatted tool call chunks. + * @param toolCallDeltas - Array of tool call deltas from the chunk + * @yields Formatted tool call chunks ready to be yielded in the API stream + */ + *processToolCallDeltas(toolCallDeltas: any[] | undefined): Generator { + if (!toolCallDeltas) { + return + } + + for (const toolCallDelta of toolCallDeltas) { + // Accumulate the tool call ID if present + if (toolCallDelta.id) { + this.lastToolCall.id = toolCallDelta.id + } + + // Accumulate the function name if present + if (toolCallDelta.function?.name) { + this.lastToolCall.name = toolCallDelta.function.name + } + + // Only yield when we have all required fields: id, name, and arguments + if (this.lastToolCall.id && this.lastToolCall.name && toolCallDelta.function?.arguments) { + yield { + type: "tool_calls", + tool_call: { + function: { + id: this.lastToolCall.id, + name: this.lastToolCall.name, + arguments: toolCallDelta.function.arguments, + }, + }, + } + } + } + } + + /** + * Reset the internal state. Call this when starting a new message. + */ + reset(): void { + this.lastToolCall = { id: "", name: "" } + } + + /** + * Get the current accumulated tool call state (useful for debugging). + */ + getState(): { id: string; name: string } { + return { ...this.lastToolCall } + } +} diff --git a/src/api/transform/tool-converters.ts b/src/api/transform/tool-converters.ts new file mode 100644 index 000000000000..7012fcae3379 --- /dev/null +++ b/src/api/transform/tool-converters.ts @@ -0,0 +1,143 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import type { ChatCompletionTool as OpenAITool } from "openai/resources/chat/completions" + +export interface ToolSpec { + name: string + description: string + parameters: ToolSpecParameter[] +} + +export interface ToolSpecParameter { + name: string + type?: "string" | "boolean" | "integer" | "array" | "object" + required: boolean + description: string + items?: any + properties?: Record + [key: string]: any // For additional JSON Schema fields +} + +/** + * Converts a ToolSpec into an OpenAI ChatCompletionTool definition + */ +export function toolSpecToOpenAITool(tool: ToolSpec): OpenAITool { + const properties: Record = {} + const required: string[] = [] + + if (tool.parameters) { + for (const param of tool.parameters) { + if (param.required) { + required.push(param.name) + } + + const paramType: string = param.type || "string" + + const paramSchema: any = { + type: paramType, + description: param.description, + } + + if (paramType === "array" && param.items) { + paramSchema.items = param.items + } + + if (paramType === "object" && param.properties) { + paramSchema.properties = param.properties + } + + // Preserve any additional JSON Schema fields + const reservedKeys = new Set(["name", "required", "description", "type", "items", "properties"]) + for (const key in param) { + if (!reservedKeys.has(key) && param[key] !== undefined) { + paramSchema[key] = param[key] + } + } + + properties[param.name] = paramSchema + } + } + + return { + type: "function", + function: { + name: tool.name, + description: tool.description, + strict: false, + parameters: { + type: "object", + properties, + required, + additionalProperties: false, + }, + }, + } +} + +/** + * Converts a ToolSpec into an Anthropic Tool definition + */ +export function toolSpecToAnthropicTool(tool: ToolSpec): Anthropic.Tool { + const properties: Record = {} + const required: string[] = [] + + if (tool.parameters) { + for (const param of tool.parameters) { + if (param.required) { + required.push(param.name) + } + + const paramType: string = param.type || "string" + + const paramSchema: any = { + type: paramType, + description: param.description, + } + + if (paramType === "array" && param.items) { + paramSchema.items = param.items + } + + if (paramType === "object" && param.properties) { + paramSchema.properties = param.properties + } + + // Preserve any additional JSON Schema fields + const reservedKeys = new Set(["name", "required", "description", "type", "items", "properties"]) + for (const key in param) { + if (!reservedKeys.has(key) && param[key] !== undefined) { + paramSchema[key] = param[key] + } + } + + properties[param.name] = paramSchema + } + } + + return { + name: tool.name, + description: tool.description, + input_schema: { + type: "object", + properties, + required, + }, + } +} + +/** + * Converts an OpenAI ChatCompletionTool into an Anthropic Tool definition + */ +export function openAIToolToAnthropic(openAITool: OpenAITool): Anthropic.Tool { + // Handle both function and custom tool types + const func = "function" in openAITool ? openAITool.function : (openAITool as any).function + + return { + name: func.name, + description: func.description || "", + input_schema: { + type: "object", + properties: func.parameters?.properties || {}, + required: (func.parameters?.required as string[]) || [], + }, + } +} diff --git a/src/api/transform/tool-use-handler.ts b/src/api/transform/tool-use-handler.ts new file mode 100644 index 000000000000..4ccca08b6f57 --- /dev/null +++ b/src/api/transform/tool-use-handler.ts @@ -0,0 +1,174 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { JSONParser } from "@streamparser/json" + +import type { ToolUse } from "../../shared/tools" + +export interface PendingToolUse { + id: string + name: string + input: string + parsedInput?: unknown + jsonParser?: JSONParser +} + +interface ToolUseDeltaBlock { + id?: string + type?: string + name?: string + input?: string +} + +const ESCAPE_MAP: Record = { + "\\n": "\n", + "\\t": "\t", + "\\r": "\r", + '\\"': '"', + "\\\\": "\\", +} + +const ESCAPE_PATTERN = /\\[ntr"\\]/g + +/** + * Handles streaming tool use blocks and converts them to Anthropic.ToolUseBlockParam format + */ +export class ToolUseHandler { + private pendingToolUses = new Map() + + processToolUseDelta(delta: ToolUseDeltaBlock): void { + if (delta.type !== "tool_use" || !delta.id) { + return + } + + let pending = this.pendingToolUses.get(delta.id) + if (!pending) { + pending = this.createPendingToolUse(delta.id, delta.name || "") + } + + if (delta.name) { + pending.name = delta.name + } + if (delta.input) { + pending.input += delta.input + try { + pending.jsonParser?.write(delta.input) + } catch { + // Expected during streaming + } + } + } + + getFinalizedToolUse(id: string): Anthropic.ToolUseBlockParam | undefined { + const pending = this.pendingToolUses.get(id) + if (!pending?.name) { + return undefined + } + + let input: unknown = {} + if (pending.parsedInput != null) { + input = pending.parsedInput + } else if (pending.input) { + try { + input = JSON.parse(pending.input) + } catch { + input = this.extractPartialJsonFields(pending.input) + } + } + + return { + type: "tool_use", + id: pending.id, + name: pending.name, + input, + } + } + + getAllFinalizedToolUses(): Anthropic.ToolUseBlockParam[] { + const results: Anthropic.ToolUseBlockParam[] = [] + for (const id of this.pendingToolUses.keys()) { + const toolUse = this.getFinalizedToolUse(id) + if (toolUse) { + results.push(toolUse) + } + } + return results + } + + hasToolUse(id: string): boolean { + return this.pendingToolUses.has(id) + } + + getPartialToolUsesAsContent(): ToolUse[] { + const results: ToolUse[] = [] + + for (const pending of this.pendingToolUses.values()) { + if (!pending.name) { + continue + } + + let input: any = {} + if (pending.parsedInput != null) { + input = pending.parsedInput + } else if (pending.input) { + try { + input = JSON.parse(pending.input) + } catch { + input = this.extractPartialJsonFields(pending.input) + } + } + + // Convert input object to string params as expected by ToolUse + const params: Record = {} + if (typeof input === "object") { + for (const [key, value] of Object.entries(input)) { + params[key] = typeof value === "string" ? value : JSON.stringify(value) + } + } + + results.push({ + type: "tool_use", + name: pending.name as any, // Will be validated later + params: params as any, + partial: true, + }) + } + + return results + } + + reset(): void { + this.pendingToolUses.clear() + } + + private createPendingToolUse(id: string, name: string): PendingToolUse { + const jsonParser = new JSONParser() + const pending: PendingToolUse = { + id, + name, + input: "", + parsedInput: undefined, + jsonParser, + } + + jsonParser.onValue = (info: any) => { + if (info.stack.length === 0 && info.value && typeof info.value === "object") { + pending.parsedInput = info.value + } + } + + jsonParser.onError = () => {} + + this.pendingToolUses.set(id, pending) + return pending + } + + private extractPartialJsonFields(partialJson: string): Record { + const result: Record = {} + const pattern = /"(\w+)":\s*"((?:[^"\\]|\\.)*)(?:")?/g + + for (const match of partialJson.matchAll(pattern)) { + result[match[1]] = match[2].replace(ESCAPE_PATTERN, (m) => ESCAPE_MAP[m]) + } + + return result + } +} diff --git a/src/core/context/ensureToolResultsFollowToolUse.ts b/src/core/context/ensureToolResultsFollowToolUse.ts new file mode 100644 index 000000000000..3488a9e992e6 --- /dev/null +++ b/src/core/context/ensureToolResultsFollowToolUse.ts @@ -0,0 +1,122 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import cloneDeep from "clone-deep" + +/** + * Ensures that every tool_use block in assistant messages has a corresponding tool_result in the next user message, + * and that tool_result blocks immediately follow their corresponding tool_use blocks in the correct order. + * + * This is required by the Anthropic API to maintain proper message pairing. + * When tool_result blocks are missing, they are automatically added with "result missing" content. + * + * @param messages - The conversation messages to validate and fix + */ +export function ensureToolResultsFollowToolUse(messages: Anthropic.Messages.MessageParam[]): void { + for (let i = 0; i < messages.length - 1; i++) { + const message = messages[i] + + // Only process assistant messages with content + if (message.role !== "assistant" || !Array.isArray(message.content)) { + continue + } + + // Extract tool_use IDs in order + const toolUseIds: string[] = [] + for (const block of message.content) { + if (block.type === "tool_use" && block.id) { + toolUseIds.push(block.id) + } + } + + // Skip if no tool_use blocks found + if (toolUseIds.length === 0) { + continue + } + + const nextMessage = messages[i + 1] + + // Skip if next message is not a user message + if (nextMessage.role !== "user") { + continue + } + + // Ensure content is an array + if (!Array.isArray(nextMessage.content)) { + nextMessage.content = [] + } + + // Separate tool_results from other blocks in a single pass + const toolResultMap = new Map() + const otherBlocks: Anthropic.Messages.ContentBlockParam[] = [] + let needsUpdate = false + + for (const block of nextMessage.content) { + if (block.type === "tool_result" && block.tool_use_id) { + toolResultMap.set(block.tool_use_id, block) + } else { + otherBlocks.push(block) + } + } + + // Check if reordering is needed (tool_results not at start in correct order) + if (toolResultMap.size > 0) { + let expectedIndex = 0 + for (let j = 0; j < nextMessage.content.length && expectedIndex < toolUseIds.length; j++) { + const block = nextMessage.content[j] + if (block.type === "tool_result" && block.tool_use_id === toolUseIds[expectedIndex]) { + expectedIndex++ + } else if (block.type === "tool_result" || expectedIndex < toolUseIds.length) { + needsUpdate = true + break + } + } + if (!needsUpdate && expectedIndex < toolResultMap.size) { + needsUpdate = true + } + } + + // Add missing tool_results + for (const toolUseId of toolUseIds) { + if (!toolResultMap.has(toolUseId)) { + toolResultMap.set(toolUseId, { + type: "tool_result", + tool_use_id: toolUseId, + content: "result missing", + }) + needsUpdate = true + } + } + + // Only modify if changes are needed + if (!needsUpdate) { + continue + } + + // Build new content: tool_results first (in toolUseIds order), then other blocks + const newContent: Anthropic.Messages.ContentBlockParam[] = [] + + // Add tool_results in the order of toolUseIds + const processedToolResults = new Set() + for (const toolUseId of toolUseIds) { + const toolResult = toolResultMap.get(toolUseId) + if (toolResult) { + newContent.push(toolResult) + processedToolResults.add(toolUseId) + } + } + + // Add any orphaned tool_results not in toolUseIds (shouldn't happen, but be safe) + for (const [toolUseId, toolResult] of toolResultMap) { + if (!processedToolResults.has(toolUseId)) { + newContent.push(toolResult) + } + } + + // Add all other blocks + newContent.push(...otherBlocks) + + // Clone and update the message + const clonedMessage = cloneDeep(nextMessage) + clonedMessage.content = newContent + messages[i + 1] = clonedMessage + } +} diff --git a/src/core/prompts/__tests__/add-custom-instructions.spec.ts b/src/core/prompts/__tests__/add-custom-instructions.spec.ts index c49baf5dea48..d8daeadb63aa 100644 --- a/src/core/prompts/__tests__/add-custom-instructions.spec.ts +++ b/src/core/prompts/__tests__/add-custom-instructions.spec.ts @@ -190,7 +190,7 @@ describe("addCustomInstructions", () => { }) it("should generate correct prompt for architect mode", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -209,11 +209,13 @@ describe("addCustomInstructions", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/architect-mode-prompt.snap") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/add-custom-instructions/architect-mode-prompt.snap", + ) }) it("should generate correct prompt for ask mode", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -232,13 +234,13 @@ describe("addCustomInstructions", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/ask-mode-prompt.snap") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/ask-mode-prompt.snap") }) it("should include MCP server creation info when enabled", async () => { const mockMcpHub = createMockMcpHub(true) - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -257,14 +259,16 @@ describe("addCustomInstructions", () => { undefined, // partialReadsEnabled ) - expect(prompt).toContain("Creating an MCP Server") - expect(prompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/mcp-server-creation-enabled.snap") + expect(result.systemPrompt).toContain("Creating an MCP Server") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/add-custom-instructions/mcp-server-creation-enabled.snap", + ) }) it("should exclude MCP server creation info when disabled", async () => { const mockMcpHub = createMockMcpHub(false) - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -283,12 +287,14 @@ describe("addCustomInstructions", () => { undefined, // partialReadsEnabled ) - expect(prompt).not.toContain("Creating an MCP Server") - expect(prompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap") + expect(result.systemPrompt).not.toContain("Creating an MCP Server") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap", + ) }) it("should include partial read instructions when partialReadsEnabled is true", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -307,7 +313,9 @@ describe("addCustomInstructions", () => { true, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/add-custom-instructions/partial-reads-enabled.snap") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/add-custom-instructions/partial-reads-enabled.snap", + ) }) it("should prioritize mode-specific rules for code mode", async () => { diff --git a/src/core/prompts/__tests__/custom-system-prompt.spec.ts b/src/core/prompts/__tests__/custom-system-prompt.spec.ts index 6106e1617431..749a7c49f9a3 100644 --- a/src/core/prompts/__tests__/custom-system-prompt.spec.ts +++ b/src/core/prompts/__tests__/custom-system-prompt.spec.ts @@ -93,7 +93,7 @@ describe("File-Based Custom System Prompt", () => { }, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "test/path", // Using a relative path without leading slash false, // supportsImages @@ -113,10 +113,10 @@ describe("File-Based Custom System Prompt", () => { ) // Should contain default sections - expect(prompt).toContain("TOOL USE") - expect(prompt).toContain("CAPABILITIES") - expect(prompt).toContain("MODES") - expect(prompt).toContain("Test role definition") + expect(result.systemPrompt).toContain("TOOL USE") + expect(result.systemPrompt).toContain("CAPABILITIES") + expect(result.systemPrompt).toContain("MODES") + expect(result.systemPrompt).toContain("Test role definition") }, ) @@ -131,7 +131,7 @@ describe("File-Based Custom System Prompt", () => { return Promise.reject({ code: "ENOENT" }) }) - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "test/path", // Using a relative path without leading slash false, // supportsImages @@ -151,12 +151,12 @@ describe("File-Based Custom System Prompt", () => { ) // Should contain role definition and file-based system prompt - expect(prompt).toContain(modes[0].roleDefinition) - expect(prompt).toContain(fileCustomSystemPrompt) + expect(result.systemPrompt).toContain(modes[0].roleDefinition) + expect(result.systemPrompt).toContain(fileCustomSystemPrompt) // Should not contain any of the default sections - expect(prompt).not.toContain("CAPABILITIES") - expect(prompt).not.toContain("MODES") + expect(result.systemPrompt).not.toContain("CAPABILITIES") + expect(result.systemPrompt).not.toContain("MODES") }) it("should combine file-based system prompt with role definition and custom instructions", async () => { @@ -177,7 +177,7 @@ describe("File-Based Custom System Prompt", () => { }, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "test/path", // Using a relative path without leading slash false, // supportsImages @@ -197,11 +197,11 @@ describe("File-Based Custom System Prompt", () => { ) // Should contain custom role definition and file-based system prompt - expect(prompt).toContain(customRoleDefinition) - expect(prompt).toContain(fileCustomSystemPrompt) + expect(result.systemPrompt).toContain(customRoleDefinition) + expect(result.systemPrompt).toContain(fileCustomSystemPrompt) // Should not contain any of the default sections - expect(prompt).not.toContain("CAPABILITIES") - expect(prompt).not.toContain("MODES") + expect(result.systemPrompt).not.toContain("CAPABILITIES") + expect(result.systemPrompt).not.toContain("MODES") }) }) diff --git a/src/core/prompts/__tests__/system-native-tools.spec.ts b/src/core/prompts/__tests__/system-native-tools.spec.ts new file mode 100644 index 000000000000..1ceca5ee2e27 --- /dev/null +++ b/src/core/prompts/__tests__/system-native-tools.spec.ts @@ -0,0 +1,213 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import { SYSTEM_PROMPT } from "../system" +import { CodeIndexManager } from "../../../services/code-index/manager" +import type { SystemPromptSettings } from "../types" + +vi.mock("../../../services/code-index/manager") +vi.mock("../../../utils/storage", () => ({ + getSettingsDirectoryPath: vi.fn().mockResolvedValue("/test/settings"), +})) +vi.mock("../../../utils/globalContext", () => ({ + ensureSettingsDirectoryExists: vi.fn().mockResolvedValue("/test/settings"), +})) + +describe("SYSTEM_PROMPT with native tools", () => { + const mockContext = { + extensionUri: { fsPath: "/test/path" }, + globalStorageUri: { fsPath: "/test/global-storage" }, + globalState: { + get: vi.fn(), + update: vi.fn(), + }, + workspaceState: { + get: vi.fn(), + update: vi.fn(), + }, + } as any + + const defaultSettings: SystemPromptSettings = { + maxConcurrentFileReads: 5, + todoListEnabled: true, + useAgentRules: true, + newTaskRequireTodos: true, + } + + beforeEach(() => { + vi.clearAllMocks() + }) + + it("should filter out update_todo_list when todoListEnabled is false", async () => { + const mockCodeIndexManager = { + isFeatureEnabled: true, + isFeatureConfigured: true, + isInitialized: true, + } + vi.mocked(CodeIndexManager.getInstance).mockReturnValue(mockCodeIndexManager as any) + + const result = await SYSTEM_PROMPT( + mockContext, + "/test/cwd", + false, + undefined, + undefined, + undefined, + "code", + undefined, + undefined, + undefined, + true, + undefined, + false, + undefined, + undefined, + false, + { ...defaultSettings, todoListEnabled: false }, + undefined, + true, // useNativeTools + ) + + expect(result.tools).toBeDefined() + const toolNames = result.tools?.map((t) => t.name) || [] + expect(toolNames).not.toContain("update_todo_list") + }) + + it("should include update_todo_list when todoListEnabled is true", async () => { + const mockCodeIndexManager = { + isFeatureEnabled: true, + isFeatureConfigured: true, + isInitialized: true, + } + vi.mocked(CodeIndexManager.getInstance).mockReturnValue(mockCodeIndexManager as any) + + const result = await SYSTEM_PROMPT( + mockContext, + "/test/cwd", + false, + undefined, + undefined, + undefined, + "code", + undefined, + undefined, + undefined, + true, + undefined, + false, + undefined, + undefined, + false, + { ...defaultSettings, todoListEnabled: true }, + undefined, + true, // useNativeTools + ) + + expect(result.tools).toBeDefined() + const toolNames = result.tools?.map((t) => t.name) || [] + expect(toolNames).toContain("update_todo_list") + }) + + it("should filter out codebase_search when feature is not configured", async () => { + const mockCodeIndexManager = { + isFeatureEnabled: false, + isFeatureConfigured: false, + isInitialized: false, + } + vi.mocked(CodeIndexManager.getInstance).mockReturnValue(mockCodeIndexManager as any) + + const result = await SYSTEM_PROMPT( + mockContext, + "/test/cwd", + false, + undefined, + undefined, + undefined, + "code", + undefined, + undefined, + undefined, + true, + undefined, + false, + undefined, + undefined, + false, + undefined, + undefined, + true, // useNativeTools + ) + + expect(result.tools).toBeDefined() + const toolNames = result.tools?.map((t) => t.name) || [] + expect(toolNames).not.toContain("codebase_search") + }) + + it("should filter out generate_image when experiment is disabled", async () => { + const mockCodeIndexManager = { + isFeatureEnabled: true, + isFeatureConfigured: true, + isInitialized: true, + } + vi.mocked(CodeIndexManager.getInstance).mockReturnValue(mockCodeIndexManager as any) + + const result = await SYSTEM_PROMPT( + mockContext, + "/test/cwd", + false, + undefined, + undefined, + undefined, + "code", + undefined, + undefined, + undefined, + true, + { imageGeneration: false }, + false, + undefined, + undefined, + false, + undefined, + undefined, + true, // useNativeTools + ) + + expect(result.tools).toBeDefined() + const toolNames = result.tools?.map((t) => t.name) || [] + expect(toolNames).not.toContain("generate_image") + }) + + it("should filter out run_slash_command when experiment is disabled", async () => { + const mockCodeIndexManager = { + isFeatureEnabled: true, + isFeatureConfigured: true, + isInitialized: true, + } + vi.mocked(CodeIndexManager.getInstance).mockReturnValue(mockCodeIndexManager as any) + + const result = await SYSTEM_PROMPT( + mockContext, + "/test/cwd", + false, + undefined, + undefined, + undefined, + "code", + undefined, + undefined, + undefined, + true, + { runSlashCommand: false }, + false, + undefined, + undefined, + false, + undefined, + undefined, + true, // useNativeTools + ) + + expect(result.tools).toBeDefined() + const toolNames = result.tools?.map((t) => t.name) || [] + expect(toolNames).not.toContain("run_slash_command") + }) +}) diff --git a/src/core/prompts/__tests__/system-prompt.spec.ts b/src/core/prompts/__tests__/system-prompt.spec.ts index 477a68261d2c..f4f321bedf84 100644 --- a/src/core/prompts/__tests__/system-prompt.spec.ts +++ b/src/core/prompts/__tests__/system-prompt.spec.ts @@ -204,7 +204,7 @@ describe("SYSTEM_PROMPT", () => { }) it("should maintain consistent system prompt", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -223,11 +223,11 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/consistent-system-prompt.snap") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/consistent-system-prompt.snap") }) it("should include browser actions when supportsImages is true", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", true, // supportsImages @@ -246,13 +246,13 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-computer-use-support.snap") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-computer-use-support.snap") }) it("should include MCP server info when mcpHub is provided", async () => { mockMcpHub = createMockMcpHub(true) - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -271,11 +271,11 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-mcp-hub-provided.snap") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-mcp-hub-provided.snap") }) it("should explicitly handle undefined mcpHub", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -294,11 +294,11 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-undefined-mcp-hub.snap") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-undefined-mcp-hub.snap") }) it("should handle different browser viewport sizes", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -317,11 +317,13 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-different-viewport-size.snap") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/system-prompt/with-different-viewport-size.snap", + ) }) it("should include diff strategy tool description when diffEnabled is true", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -340,12 +342,12 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-true.snap") + expect(result.systemPrompt).toContain("apply_diff") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-true.snap") }) it("should exclude diff strategy tool description when diffEnabled is false", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, // supportsImages @@ -364,12 +366,12 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).not.toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-false.snap") + expect(result.systemPrompt).not.toContain("apply_diff") + expect(result.systemPrompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-false.snap") }) it("should exclude diff strategy tool description when diffEnabled is undefined", async () => { - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -388,8 +390,10 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).not.toContain("apply_diff") - expect(prompt).toMatchFileSnapshot("./__snapshots__/system-prompt/with-diff-enabled-undefined.snap") + expect(result.systemPrompt).not.toContain("apply_diff") + expect(result.systemPrompt).toMatchFileSnapshot( + "./__snapshots__/system-prompt/with-diff-enabled-undefined.snap", + ) }) it("should include vscode language in custom instructions", async () => { @@ -420,7 +424,7 @@ describe("SYSTEM_PROMPT", () => { dispose: vi.fn(), })) - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -439,8 +443,8 @@ describe("SYSTEM_PROMPT", () => { undefined, // partialReadsEnabled ) - expect(prompt).toContain("Language Preference:") - expect(prompt).toContain('You should always speak and think in the "es" language') + expect(result.systemPrompt).toContain("Language Preference:") + expect(result.systemPrompt).toContain('You should always speak and think in the "es" language') // Reset mock vscode.env = { language: "en" } @@ -481,7 +485,7 @@ describe("SYSTEM_PROMPT", () => { }, ] - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -501,11 +505,13 @@ describe("SYSTEM_PROMPT", () => { ) // Role definition should be at the top - expect(prompt.indexOf("Custom role definition")).toBeLessThan(prompt.indexOf("TOOL USE")) + expect(result.systemPrompt.indexOf("Custom role definition")).toBeLessThan( + result.systemPrompt.indexOf("TOOL USE"), + ) // Custom instructions should be at the bottom - const customInstructionsIndex = prompt.indexOf("Custom mode instructions") - const userInstructionsHeader = prompt.indexOf("USER'S CUSTOM INSTRUCTIONS") + const customInstructionsIndex = result.systemPrompt.indexOf("Custom mode instructions") + const userInstructionsHeader = result.systemPrompt.indexOf("USER'S CUSTOM INSTRUCTIONS") expect(customInstructionsIndex).toBeGreaterThan(-1) expect(userInstructionsHeader).toBeGreaterThan(-1) expect(customInstructionsIndex).toBeGreaterThan(userInstructionsHeader) @@ -519,7 +525,7 @@ describe("SYSTEM_PROMPT", () => { }, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -539,9 +545,11 @@ describe("SYSTEM_PROMPT", () => { ) // Role definition from promptComponent should be at the top - expect(prompt.indexOf("Custom prompt role definition")).toBeLessThan(prompt.indexOf("TOOL USE")) + expect(result.systemPrompt.indexOf("Custom prompt role definition")).toBeLessThan( + result.systemPrompt.indexOf("TOOL USE"), + ) // Should not contain the default mode's role definition - expect(prompt).not.toContain(modes[0].roleDefinition) + expect(result.systemPrompt).not.toContain(modes[0].roleDefinition) }) it("should fallback to modeConfig roleDefinition when promptComponent has no roleDefinition", async () => { @@ -552,7 +560,7 @@ describe("SYSTEM_PROMPT", () => { }, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -572,7 +580,9 @@ describe("SYSTEM_PROMPT", () => { ) // Should use the default mode's role definition - expect(prompt.indexOf(modes[0].roleDefinition)).toBeLessThan(prompt.indexOf("TOOL USE")) + expect(result.systemPrompt.indexOf(modes[0].roleDefinition)).toBeLessThan( + result.systemPrompt.indexOf("TOOL USE"), + ) }) it("should exclude update_todo_list tool when todoListEnabled is false", async () => { @@ -583,7 +593,7 @@ describe("SYSTEM_PROMPT", () => { newTaskRequireTodos: false, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -604,7 +614,7 @@ describe("SYSTEM_PROMPT", () => { ) // Should not contain the tool description - expect(prompt).not.toContain("## update_todo_list") + expect(result.systemPrompt).not.toContain("## update_todo_list") // Mode instructions will still reference the tool with a fallback to markdown }) @@ -616,7 +626,7 @@ describe("SYSTEM_PROMPT", () => { newTaskRequireTodos: false, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -636,8 +646,8 @@ describe("SYSTEM_PROMPT", () => { settings, // settings ) - expect(prompt).toContain("update_todo_list") - expect(prompt).toContain("## update_todo_list") + expect(result.systemPrompt).toContain("update_todo_list") + expect(result.systemPrompt).toContain("## update_todo_list") }) it("should include update_todo_list tool when todoListEnabled is undefined", async () => { @@ -648,7 +658,7 @@ describe("SYSTEM_PROMPT", () => { newTaskRequireTodos: false, } - const prompt = await SYSTEM_PROMPT( + const result = await SYSTEM_PROMPT( mockContext, "/test/path", false, @@ -668,8 +678,8 @@ describe("SYSTEM_PROMPT", () => { settings, // settings ) - expect(prompt).toContain("update_todo_list") - expect(prompt).toContain("## update_todo_list") + expect(result.systemPrompt).toContain("update_todo_list") + expect(result.systemPrompt).toContain("## update_todo_list") }) afterAll(() => { diff --git a/src/core/prompts/responses.ts b/src/core/prompts/responses.ts index fd51b18feda4..0aee7fd06e96 100644 --- a/src/core/prompts/responses.ts +++ b/src/core/prompts/responses.ts @@ -18,25 +18,30 @@ export const formatResponse = { rooIgnoreError: (path: string) => `Access to ${path} is blocked by the .rooignore file settings. You must try to continue in the task without using this file, or ask the user to update the .rooignore file.`, - noToolsUsed: () => + noToolsUsed: (useNativeTools: boolean = false) => `[ERROR] You did not use a tool in your previous response! Please retry with a tool use. -${toolUseInstructionsReminder} +${getToolUseInstructionsReminder(useNativeTools)} # Next Steps -If you have completed the user's task, use the attempt_completion tool. -If you require additional information from the user, use the ask_followup_question tool. -Otherwise, if you have not completed the task and do not need additional information, then proceed with the next step of the task. +If you have completed the user's task, use the attempt_completion tool. +If you require additional information from the user, use the ask_followup_question tool. +Otherwise, if you have not completed the task and do not need additional information, then proceed with the next step of the task. (This is an automated message, so do not respond to it conversationally.)`, tooManyMistakes: (feedback?: string) => `You seem to be having trouble proceeding. The user has provided the following feedback to help guide you:\n\n${feedback}\n`, - missingToolParameterError: (paramName: string) => - `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${toolUseInstructionsReminder}`, + missingToolParameterError: (paramName: string, useNativeTools: boolean = false) => + `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${getToolUseInstructionsReminder(useNativeTools)}`, - lineCountTruncationError: (actualLineCount: number, isNewFile: boolean, diffStrategyEnabled: boolean = false) => { + lineCountTruncationError: ( + actualLineCount: number, + isNewFile: boolean, + diffStrategyEnabled: boolean = false, + useNativeTools: boolean = false, + ) => { const truncationMessage = `Note: Your response may have been truncated because it exceeded your output limit. You wrote ${actualLineCount} lines of content, but the line_count parameter was either missing or not included in your response.` const newFileGuidance = @@ -66,7 +71,7 @@ Otherwise, if you have not completed the task and do not need additional informa `RECOMMENDED APPROACH:\n` + `${existingFileApproaches.join("\n")}\n` - return `${isNewFile ? newFileGuidance : existingFileGuidance}\n${toolUseInstructionsReminder}` + return `${isNewFile ? newFileGuidance : existingFileGuidance}\n${getToolUseInstructionsReminder(useNativeTools)}` }, invalidMcpToolArgumentError: (serverName: string, toolName: string) => @@ -200,7 +205,14 @@ const formatImagesIntoBlocks = (images?: string[]): Anthropic.ImageBlockParam[] : [] } -const toolUseInstructionsReminder = `# Reminder: Instructions for Tool Use +export function getToolUseInstructionsReminder(useNativeTools: boolean = false): string { + if (useNativeTools) { + return `# Reminder: Instructions for Tool Use + +When using tools, ensure you provide all required parameters as specified in the tool's schema. The system will validate your tool calls and return errors if required parameters are missing or incorrectly formatted.` + } + + return `# Reminder: Instructions for Tool Use Tool uses are formatted using XML-style tags. The tool name itself becomes the XML tag name. Each parameter is enclosed within its own set of tags. Here's the structure: @@ -219,3 +231,7 @@ I have completed the task... Always use the actual tool name as the XML tag name for proper parsing and execution.` +} + +// Default reminder for backward compatibility (XML mode) +const toolUseInstructionsReminder = getToolUseInstructionsReminder() diff --git a/src/core/prompts/sections/tool-use-guidelines.ts b/src/core/prompts/sections/tool-use-guidelines.ts index f6843cf84238..a054969cf446 100644 --- a/src/core/prompts/sections/tool-use-guidelines.ts +++ b/src/core/prompts/sections/tool-use-guidelines.ts @@ -1,6 +1,9 @@ import { CodeIndexManager } from "../../../services/code-index/manager" -export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager): string { +export function getToolUseGuidelinesSection( + codeIndexManager?: CodeIndexManager, + useNativeTools: boolean = false, +): string { const isCodebaseSearchAvailable = codeIndexManager && codeIndexManager.isFeatureEnabled && @@ -34,7 +37,12 @@ export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager) guidelinesList.push( `${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`, ) - guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`) + + // Only include XML format guideline for XML-based tools + if (!useNativeTools) { + guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`) + } + guidelinesList.push(`${itemNumber++}. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include: - Information about whether the tool succeeded or failed, along with any reasons for failure. - Linter errors that may have arisen due to the changes you made, which you'll need to address. diff --git a/src/core/prompts/sections/tool-use.ts b/src/core/prompts/sections/tool-use.ts index 28d47d09858e..d192347fbe2d 100644 --- a/src/core/prompts/sections/tool-use.ts +++ b/src/core/prompts/sections/tool-use.ts @@ -1,9 +1,7 @@ -export function getSharedToolUseSection(): string { - return `==== - -TOOL USE - -You have access to a set of tools that are executed upon the user's approval. You must use exactly one tool per message, and every assistant message must include a tool call. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use. +export function getSharedToolUseSection(useNativeTools: boolean = false): string { + const xmlFormatting = useNativeTools + ? "" + : ` # Tool Use Formatting @@ -16,4 +14,10 @@ Tool uses are formatted using XML-style tags. The tool name itself becomes the X Always use the actual tool name as the XML tag name for proper parsing and execution.` + + return `==== + +TOOL USE + +You have access to a set of tools that are executed upon the user's approval. You must use exactly one tool per message, and every assistant message must include a tool call. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use.${xmlFormatting}` } diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 3cc327c8151c..49a4b396bbf9 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -1,21 +1,23 @@ import * as vscode from "vscode" import * as os from "os" -import type { ModeConfig, PromptComponent, CustomModePrompts, TodoItem } from "@roo-code/types" +import type { ModeConfig, PromptComponent, CustomModePrompts, TodoItem, ToolName } from "@roo-code/types" import type { SystemPromptSettings } from "./types" import { Mode, modes, defaultModeSlug, getModeBySlug, getGroupName, getModeSelection } from "../../shared/modes" -import { DiffStrategy } from "../../shared/tools" +import { DiffStrategy, TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS } from "../../shared/tools" import { formatLanguage } from "../../shared/language" import { isEmpty } from "../../utils/object" import { McpHub } from "../../services/mcp/McpHub" import { CodeIndexManager } from "../../services/code-index/manager" +import type { ToolSpec } from "../../api/transform/tool-converters" +import { getToolSpecs } from "./tool-specs" import { PromptVariables, loadSystemPromptFile } from "./sections/custom-system-prompt" -import { getToolDescriptionsForMode } from "./tools" +import { getToolDescriptionsForMode, filterToolsByAvailability } from "./tools" import { getRulesSection, getSystemInfoSection, @@ -60,8 +62,8 @@ async function generatePrompt( rooIgnoreInstructions?: string, partialReadsEnabled?: boolean, settings?: SystemPromptSettings, - todoList?: TodoItem[], modelId?: string, + useNativeTools?: boolean, ): Promise { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -92,7 +94,7 @@ async function generatePrompt( ${markdownFormattingSection()} -${getSharedToolUseSection()} +${getSharedToolUseSection(useNativeTools)} ${getToolDescriptionsForMode( mode, @@ -110,7 +112,7 @@ ${getToolDescriptionsForMode( modelId, )} -${getToolUseGuidelinesSection(codeIndexManager)} +${getToolUseGuidelinesSection(codeIndexManager, useNativeTools)} ${mcpServersSection} @@ -151,9 +153,9 @@ export const SYSTEM_PROMPT = async ( rooIgnoreInstructions?: string, partialReadsEnabled?: boolean, settings?: SystemPromptSettings, - todoList?: TodoItem[], modelId?: string, -): Promise => { + useNativeTools?: boolean, +): Promise<{ systemPrompt: string; tools?: ToolSpec[] }> => { if (!context) { throw new Error("Extension context is required for generating system prompt") } @@ -195,17 +197,19 @@ export const SYSTEM_PROMPT = async ( ) // For file-based prompts, don't include the tool sections - return `${roleDefinition} + return { + systemPrompt: `${roleDefinition} ${fileCustomSystemPrompt} -${customInstructions}` +${customInstructions}`, + } } // If diff is disabled, don't pass the diffStrategy const effectiveDiffStrategy = diffEnabled ? diffStrategy : undefined - return generatePrompt( + const basePrompt = await generatePrompt( context, cwd, supportsComputerUse, @@ -223,7 +227,43 @@ ${customInstructions}` rooIgnoreInstructions, partialReadsEnabled, settings, - todoList, modelId, + useNativeTools, ) + + // If native tools are enabled, build tool specifications and return them with the prompt + if (useNativeTools) { + // Get tool names for the current mode by iterating through its groups + const toolNames: ToolName[] = [] + + currentMode.groups.forEach((group) => { + const groupName = getGroupName(group) + // Get tools from the centralized TOOL_GROUPS registry + const groupConfig = TOOL_GROUPS[groupName] + if (groupConfig) { + toolNames.push(...(groupConfig.tools as ToolName[])) + } + }) + + // Add always-available tools + toolNames.push(...ALWAYS_AVAILABLE_TOOLS) + + // Remove duplicates + const uniqueToolNames = [...new Set(toolNames)] + + // Apply centralized filtering to ensure consistency with XML tool mode + const codeIndexManager = CodeIndexManager.getInstance(context, cwd) + const filteredTools = filterToolsByAvailability(uniqueToolNames, codeIndexManager, settings, experiments) + + // Get tool specifications with settings for dynamic specs + const tools = getToolSpecs(filteredTools, settings) + + return { + systemPrompt: basePrompt, + tools, + } + } + + // For XML-based tools, return without tool specs + return { systemPrompt: basePrompt } } diff --git a/src/core/prompts/tool-specs/access-mcp-resource.ts b/src/core/prompts/tool-specs/access-mcp-resource.ts new file mode 100644 index 000000000000..0878ddf05b4a --- /dev/null +++ b/src/core/prompts/tool-specs/access-mcp-resource.ts @@ -0,0 +1,25 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for access_mcp_resource + * This defines the schema for accessing MCP server resources + */ +export const accessMcpResourceToolSpec: ToolSpec = { + name: "access_mcp_resource", + description: + "Request to access a resource provided by an MCP (Model Context Protocol) server. MCP servers can provide various resources like file systems, databases, or APIs. This tool acts as a bridge to access MCP server resources.", + parameters: [ + { + name: "server_name", + type: "string", + required: true, + description: "The name of the MCP server that provides the resource", + }, + { + name: "uri", + type: "string", + required: true, + description: "The URI of the resource to access on the MCP server", + }, + ], +} diff --git a/src/core/prompts/tool-specs/apply-diff.ts b/src/core/prompts/tool-specs/apply-diff.ts new file mode 100644 index 000000000000..2e4cbea169cc --- /dev/null +++ b/src/core/prompts/tool-specs/apply-diff.ts @@ -0,0 +1,26 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for apply_diff + * This defines the schema for applying targeted changes to files + */ +export const applyDiffToolSpec: ToolSpec = { + name: "apply_diff", + description: + "Request to apply PRECISE, TARGETED modifications to an existing file by searching for specific sections of content and replacing them. This tool is for SURGICAL EDITS ONLY - specific changes to existing code. You can perform multiple distinct search and replace operations within a single `apply_diff` call by providing multiple SEARCH/REPLACE blocks in the `diff` parameter. This is the preferred way to make several targeted changes efficiently. The SEARCH section must exactly match existing content including whitespace and indentation. If you're not confident in the exact content to search for, use the read_file tool first to get the exact content. When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file. ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: "The path of the file to modify (relative to the current workspace directory)", + }, + { + name: "diff", + type: "string", + required: true, + description: + "The search/replace block defining the changes. Format:\n```\n<<<<<<< SEARCH\n:start_line: (required) The line number of original content where the search block starts.\n-------\n[exact content to find including whitespace]\n=======\n[new content to replace with]\n>>>>>>> REPLACE\n```\nYou can include multiple SEARCH/REPLACE blocks in a single diff.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/ask-followup-question.ts b/src/core/prompts/tool-specs/ask-followup-question.ts new file mode 100644 index 000000000000..a821525ea27c --- /dev/null +++ b/src/core/prompts/tool-specs/ask-followup-question.ts @@ -0,0 +1,26 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for ask_followup_question + * This defines the schema for asking users follow-up questions + */ +export const askFollowupQuestionToolSpec: ToolSpec = { + name: "ask_followup_question", + description: + "Ask the user a question to gather additional information needed to complete the task. Use when you need clarification or more details to proceed effectively.", + parameters: [ + { + name: "question", + type: "string", + required: true, + description: "A clear, specific question addressing the information needed", + }, + { + name: "follow_up", + type: "string", + required: true, + description: + "A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. Optionally include mode attribute to switch modes (code/architect/etc.)", + }, + ], +} diff --git a/src/core/prompts/tool-specs/attempt-completion.ts b/src/core/prompts/tool-specs/attempt-completion.ts new file mode 100644 index 000000000000..932dda1afd52 --- /dev/null +++ b/src/core/prompts/tool-specs/attempt-completion.ts @@ -0,0 +1,20 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for attempt_completion + * This defines the schema for completing tasks + */ +export const attemptCompletionToolSpec: ToolSpec = { + name: "attempt_completion", + description: + "After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.\n\nIMPORTANT NOTE: This tool CANNOT be used until you've confirmed from the user that any previous tool uses were successful. Failure to do so will result in code corruption and system failure. Before using this tool, you must confirm that you've received successful results from the user for any previous tool uses. If not, then DO NOT use this tool.", + parameters: [ + { + name: "result", + type: "string", + required: true, + description: + "The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/browser-action.ts b/src/core/prompts/tool-specs/browser-action.ts new file mode 100644 index 000000000000..7287364a2ad0 --- /dev/null +++ b/src/core/prompts/tool-specs/browser-action.ts @@ -0,0 +1,44 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for browser_action + * This defines the schema for browser automation actions + */ +export const browserActionToolSpec: ToolSpec = { + name: "browser_action", + description: + "Request to interact with a browser session for web automation and testing. Supports actions like launching URLs, clicking elements, typing text, scrolling, and capturing screenshots.", + parameters: [ + { + name: "action", + type: "string", + required: true, + description: + "The browser action to perform: 'launch' (open URL), 'click' (click element at coordinate), 'type' (type text), 'scroll_down', 'scroll_up', 'screenshot', or 'close'", + }, + { + name: "url", + type: "string", + required: false, + description: "The URL to navigate to (required for 'launch' action)", + }, + { + name: "coordinate", + type: "string", + required: false, + description: "The x,y coordinate to click (required for 'click' action, format: 'x,y')", + }, + { + name: "text", + type: "string", + required: false, + description: "The text to type (required for 'type' action)", + }, + { + name: "size", + type: "string", + required: false, + description: "Browser viewport size (optional, format: 'widthxheight', e.g., '1024x768')", + }, + ], +} diff --git a/src/core/prompts/tool-specs/codebase-search.ts b/src/core/prompts/tool-specs/codebase-search.ts new file mode 100644 index 000000000000..923115de2400 --- /dev/null +++ b/src/core/prompts/tool-specs/codebase-search.ts @@ -0,0 +1,27 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for codebase_search + * This defines the schema for semantic codebase searching + */ +export const codebaseSearchToolSpec: ToolSpec = { + name: "codebase_search", + description: + "Find files most relevant to the search query using semantic search. Searches based on meaning rather than exact text matches. By default searches entire workspace. Reuse the user's exact wording unless there's a clear reason not to - their phrasing often helps semantic search. Queries MUST be in English (translate if needed).", + parameters: [ + { + name: "query", + type: "string", + required: true, + description: + "The search query. Reuse the user's exact wording/question format unless there's a clear reason not to.", + }, + { + name: "path", + type: "string", + required: false, + description: + "Limit search to specific subdirectory (relative to the current workspace directory). Leave empty for entire workspace.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/execute-command.ts b/src/core/prompts/tool-specs/execute-command.ts new file mode 100644 index 000000000000..1b0633be5d35 --- /dev/null +++ b/src/core/prompts/tool-specs/execute-command.ts @@ -0,0 +1,26 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for execute_command + * This defines the schema for executing CLI commands + */ +export const executeCommandToolSpec: ToolSpec = { + name: "execute_command", + description: + "Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Prefer relative commands and paths that avoid location sensitivity for terminal consistency, e.g: `touch ./testdata/example.file`, `dir ./examples/model1/data/yaml`, or `go test ./cmd/front --config ./cmd/front/config.yml`. If directed by the user, you may open a terminal in a different directory by using the `cwd` parameter.", + parameters: [ + { + name: "command", + type: "string", + required: true, + description: + "The CLI command to execute. This should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.", + }, + { + name: "cwd", + type: "string", + required: false, + description: "The working directory to execute the command in (optional)", + }, + ], +} diff --git a/src/core/prompts/tool-specs/fetch-instructions.ts b/src/core/prompts/tool-specs/fetch-instructions.ts new file mode 100644 index 000000000000..4803e518678d --- /dev/null +++ b/src/core/prompts/tool-specs/fetch-instructions.ts @@ -0,0 +1,18 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for fetch_instructions + * This defines the schema for fetching task-specific instructions + */ +export const fetchInstructionsToolSpec: ToolSpec = { + name: "fetch_instructions", + description: "Request to fetch instructions to perform a task", + parameters: [ + { + name: "task", + type: "string", + required: true, + description: "The task to get instructions for. This can take the following values:\n create_mode", + }, + ], +} diff --git a/src/core/prompts/tool-specs/generate-image.ts b/src/core/prompts/tool-specs/generate-image.ts new file mode 100644 index 000000000000..a34794ff8bf5 --- /dev/null +++ b/src/core/prompts/tool-specs/generate-image.ts @@ -0,0 +1,33 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for generate_image + * This defines the schema for generating images using AI + */ +export const generateImageToolSpec: ToolSpec = { + name: "generate_image", + description: + "Request to generate or edit an image using AI models through OpenRouter API. This tool can create new images from text prompts or modify existing images based on your instructions. When an input image is provided, the AI will apply the requested edits, transformations, or enhancements to that image.", + parameters: [ + { + name: "prompt", + type: "string", + required: true, + description: "The text prompt describing what to generate or how to edit the image", + }, + { + name: "path", + type: "string", + required: true, + description: + "The file path where the generated/edited image should be saved (relative to the current workspace directory). The tool will automatically add the appropriate image extension if not provided.", + }, + { + name: "image", + type: "string", + required: false, + description: + "The file path to an input image to edit or transform (relative to the current workspace directory). Supported formats: PNG, JPG, JPEG, GIF, WEBP.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/index.ts b/src/core/prompts/tool-specs/index.ts new file mode 100644 index 000000000000..f814c94acd19 --- /dev/null +++ b/src/core/prompts/tool-specs/index.ts @@ -0,0 +1,110 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" +import type { ToolName } from "@roo-code/types" +import type { SystemPromptSettings } from "../types" + +// Import tool specifications (static for now, but wrapped in factories) +import { getReadFileToolSpec } from "./read-file" +import { writeToFileToolSpec } from "./write-to-file" +import { applyDiffToolSpec } from "./apply-diff" +import { executeCommandToolSpec } from "./execute-command" +import { listFilesToolSpec } from "./list-files" +import { searchFilesToolSpec } from "./search-files" +import { listCodeDefinitionNamesToolSpec } from "./list-code-definition-names" +import { codebaseSearchToolSpec } from "./codebase-search" +import { insertContentToolSpec } from "./insert-content" +import { searchAndReplaceToolSpec } from "./search-and-replace" +import { browserActionToolSpec } from "./browser-action" +import { useMcpToolToolSpec } from "./use-mcp-tool" +import { accessMcpResourceToolSpec } from "./access-mcp-resource" +import { askFollowupQuestionToolSpec } from "./ask-followup-question" +import { attemptCompletionToolSpec } from "./attempt-completion" +import { switchModeToolSpec } from "./switch-mode" +import { newTaskToolSpec } from "./new-task" +import { updateTodoListToolSpec } from "./update-todo-list" +import { runSlashCommandToolSpec } from "./run-slash-command" +import { generateImageToolSpec } from "./generate-image" +import { fetchInstructionsToolSpec } from "./fetch-instructions" + +/** + * Type for tool spec factories that can generate specs based on settings + */ +type ToolSpecFactory = (settings?: SystemPromptSettings) => ToolSpec + +/** + * Registry of all tool specification factories + * All tool specs are now generated dynamically to support future configuration needs + */ +const TOOL_SPEC_FACTORIES: Record = { + read_file: getReadFileToolSpec, + write_to_file: () => writeToFileToolSpec, + apply_diff: () => applyDiffToolSpec, + execute_command: () => executeCommandToolSpec, + list_files: () => listFilesToolSpec, + search_files: () => searchFilesToolSpec, + list_code_definition_names: () => listCodeDefinitionNamesToolSpec, + codebase_search: () => codebaseSearchToolSpec, + insert_content: () => insertContentToolSpec, + search_and_replace: () => searchAndReplaceToolSpec, + browser_action: () => browserActionToolSpec, + use_mcp_tool: () => useMcpToolToolSpec, + access_mcp_resource: () => accessMcpResourceToolSpec, + ask_followup_question: () => askFollowupQuestionToolSpec, + attempt_completion: () => attemptCompletionToolSpec, + switch_mode: () => switchModeToolSpec, + new_task: () => newTaskToolSpec, + update_todo_list: () => updateTodoListToolSpec, + run_slash_command: () => runSlashCommandToolSpec, + generate_image: () => generateImageToolSpec, + fetch_instructions: () => fetchInstructionsToolSpec, +} + +/** + * Static registry for backward compatibility + * Maps tool names to their default specifications (generated without settings) + */ +export const TOOL_SPECS: Record = { + read_file: getReadFileToolSpec(), + write_to_file: writeToFileToolSpec, + apply_diff: applyDiffToolSpec, + execute_command: executeCommandToolSpec, + list_files: listFilesToolSpec, + search_files: searchFilesToolSpec, + list_code_definition_names: listCodeDefinitionNamesToolSpec, + codebase_search: codebaseSearchToolSpec, + insert_content: insertContentToolSpec, + search_and_replace: searchAndReplaceToolSpec, + browser_action: browserActionToolSpec, + use_mcp_tool: useMcpToolToolSpec, + access_mcp_resource: accessMcpResourceToolSpec, + ask_followup_question: askFollowupQuestionToolSpec, + attempt_completion: attemptCompletionToolSpec, + switch_mode: switchModeToolSpec, + new_task: newTaskToolSpec, + update_todo_list: updateTodoListToolSpec, + run_slash_command: runSlashCommandToolSpec, + generate_image: generateImageToolSpec, + fetch_instructions: fetchInstructionsToolSpec, +} + +/** + * Get tool specifications for a specific set of tool names + * @param toolNames - Array of tool names to get specs for + * @param settings - Optional settings to customize tool specs + * @returns Array of tool specifications + */ +export function getToolSpecs(toolNames: ToolName[], settings?: SystemPromptSettings): ToolSpec[] { + return toolNames + .map((name) => { + const factory = TOOL_SPEC_FACTORIES[name] + return factory ? factory(settings) : undefined + }) + .filter((spec): spec is ToolSpec => spec !== undefined) +} + +/** + * Get all available tool specifications with default settings + * @returns Array of all tool specifications + */ +export function getAllToolSpecs(): ToolSpec[] { + return Object.values(TOOL_SPECS) +} diff --git a/src/core/prompts/tool-specs/insert-content.ts b/src/core/prompts/tool-specs/insert-content.ts new file mode 100644 index 000000000000..4854a77fd623 --- /dev/null +++ b/src/core/prompts/tool-specs/insert-content.ts @@ -0,0 +1,32 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for insert_content + * This defines the schema for inserting content into files + */ +export const insertContentToolSpec: ToolSpec = { + name: "insert_content", + description: + "Use this tool specifically for adding new lines of content into a file without modifying existing content. Specify the line number to insert before, or use line 0 to append to the end. Ideal for adding imports, functions, configuration blocks, log entries, or any multi-line text block.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: "File path relative to workspace directory", + }, + { + name: "line", + type: "integer", + required: true, + description: + "Line number where content will be inserted (1-based). Use 0 to append at end of file. Use any positive number to insert before that line", + }, + { + name: "content", + type: "string", + required: true, + description: "The content to insert at the specified line", + }, + ], +} diff --git a/src/core/prompts/tool-specs/list-code-definition-names.ts b/src/core/prompts/tool-specs/list-code-definition-names.ts new file mode 100644 index 000000000000..1366d33e8e3f --- /dev/null +++ b/src/core/prompts/tool-specs/list-code-definition-names.ts @@ -0,0 +1,20 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for list_code_definition_names + * This defines the schema for listing code definitions + */ +export const listCodeDefinitionNamesToolSpec: ToolSpec = { + name: "list_code_definition_names", + description: + "Request to list definition names (classes, functions, methods, etc.) from source code. This tool can analyze either a single file or all files at the top level of a specified directory. It provides insights into the codebase structure and important constructs, encapsulating high-level concepts and relationships that are crucial for understanding the overall architecture.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: + "The path of the file or directory (relative to the current working directory) to analyze. When given a directory, it lists definitions from all top-level source files.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/list-files.ts b/src/core/prompts/tool-specs/list-files.ts new file mode 100644 index 000000000000..73962630f56e --- /dev/null +++ b/src/core/prompts/tool-specs/list-files.ts @@ -0,0 +1,26 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for list_files + * This defines the schema for listing directory contents + */ +export const listFilesToolSpec: ToolSpec = { + name: "list_files", + description: + "Request to list files and directories within the specified directory. If recursive is true, it will list all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. Do not use this tool to confirm the existence of files you may have created, as the user will let you know if the files were created successfully or not.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: "The path of the directory to list contents for (relative to the current workspace directory)", + }, + { + name: "recursive", + type: "boolean", + required: false, + description: + "Whether to list files recursively. Use true for recursive listing, false or omit for top-level only.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/new-task.ts b/src/core/prompts/tool-specs/new-task.ts new file mode 100644 index 000000000000..fbac3796cb09 --- /dev/null +++ b/src/core/prompts/tool-specs/new-task.ts @@ -0,0 +1,24 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for new_task + * This defines the schema for creating new task instances + */ +export const newTaskToolSpec: ToolSpec = { + name: "new_task", + description: "This will let you create a new task instance in the chosen mode using your provided message.", + parameters: [ + { + name: "mode", + type: "string", + required: true, + description: 'The slug of the mode to start the new task in (e.g., "code", "debug", "architect").', + }, + { + name: "message", + type: "string", + required: true, + description: "The initial user message or instructions for this new task.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/read-file.ts b/src/core/prompts/tool-specs/read-file.ts new file mode 100644 index 000000000000..d092023ced1d --- /dev/null +++ b/src/core/prompts/tool-specs/read-file.ts @@ -0,0 +1,41 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" +import type { SystemPromptSettings } from "../types" + +/** + * Tool specification for read_file + * This defines the schema for reading file contents + */ +export function getReadFileToolSpec(settings?: SystemPromptSettings): ToolSpec { + const maxConcurrentFileReads = settings?.maxConcurrentFileReads ?? 5 + + return { + name: "read_file", + description: `Request to read the contents of one or more files. The tool outputs line-numbered content (e.g. "1 | const x = 1") for easy reference when creating diffs or discussing code. Supports text extraction from PDF and DOCX files, but may not handle other binary files properly.\n\n**IMPORTANT: You can read a maximum of ${maxConcurrentFileReads} files in a single request.** If you need to read more files, use multiple sequential read_file requests.`, + parameters: [ + { + name: "args", + type: "object", + required: true, + description: "Contains one or more file elements, where each file contains a path", + properties: { + file: { + type: "array", + description: `Array of file objects to read (maximum ${maxConcurrentFileReads} files)`, + items: { + type: "object", + properties: { + path: { + type: "string", + description: "File path (relative to workspace directory)", + }, + }, + required: ["path"], + }, + }, + }, + }, + ], + } +} +// Export a default spec for backward compatibility +export const readFileToolSpec = getReadFileToolSpec() diff --git a/src/core/prompts/tool-specs/run-slash-command.ts b/src/core/prompts/tool-specs/run-slash-command.ts new file mode 100644 index 000000000000..6bc81a10ad3f --- /dev/null +++ b/src/core/prompts/tool-specs/run-slash-command.ts @@ -0,0 +1,25 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for run_slash_command + * This defines the schema for executing slash commands + */ +export const runSlashCommandToolSpec: ToolSpec = { + name: "run_slash_command", + description: + "Execute a slash command to get specific instructions or content. Slash commands are predefined templates that provide detailed guidance for common tasks.", + parameters: [ + { + name: "command", + type: "string", + required: true, + description: 'The name of the slash command to execute (e.g., "init", "test", "deploy")', + }, + { + name: "args", + type: "string", + required: false, + description: "Additional arguments or context to pass to the command", + }, + ], +} diff --git a/src/core/prompts/tool-specs/search-and-replace.ts b/src/core/prompts/tool-specs/search-and-replace.ts new file mode 100644 index 000000000000..bebf84027702 --- /dev/null +++ b/src/core/prompts/tool-specs/search-and-replace.ts @@ -0,0 +1,55 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for search_and_replace + * This defines the schema for find and replace operations + */ +export const searchAndReplaceToolSpec: ToolSpec = { + name: "search_and_replace", + description: + "Use this tool to find and replace specific text strings or patterns (using regex) within a file. It's suitable for targeted replacements across multiple locations within the file. Supports literal text and regex patterns, case sensitivity options, and optional line ranges. Shows a diff preview before applying changes.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: "The path of the file to modify (relative to the current workspace directory)", + }, + { + name: "search", + type: "string", + required: true, + description: "The text or pattern to search for", + }, + { + name: "replace", + type: "string", + required: true, + description: "The text to replace matches with", + }, + { + name: "start_line", + type: "integer", + required: false, + description: "Starting line number for restricted replacement (1-based)", + }, + { + name: "end_line", + type: "integer", + required: false, + description: "Ending line number for restricted replacement (1-based)", + }, + { + name: "use_regex", + type: "boolean", + required: false, + description: 'Set to "true" to treat search as a regex pattern (default: false)', + }, + { + name: "ignore_case", + type: "boolean", + required: false, + description: 'Set to "true" to ignore case when matching (default: false)', + }, + ], +} diff --git a/src/core/prompts/tool-specs/search-files.ts b/src/core/prompts/tool-specs/search-files.ts new file mode 100644 index 000000000000..3e02048b50f5 --- /dev/null +++ b/src/core/prompts/tool-specs/search-files.ts @@ -0,0 +1,33 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for search_files + * This defines the schema for regex searching across files + */ +export const searchFilesToolSpec: ToolSpec = { + name: "search_files", + description: + "Request to perform a regex search across files in a specified directory, providing context-rich results. This tool searches for patterns or specific content across multiple files, displaying each match with encapsulating context.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: + "The path of the directory to search in (relative to the current workspace directory). This directory will be recursively searched.", + }, + { + name: "regex", + type: "string", + required: true, + description: "The regular expression pattern to search for. Uses Rust regex syntax.", + }, + { + name: "file_pattern", + type: "string", + required: false, + description: + "Glob pattern to filter files (e.g., '*.ts' for TypeScript files). If not provided, it will search all files (*).", + }, + ], +} diff --git a/src/core/prompts/tool-specs/switch-mode.ts b/src/core/prompts/tool-specs/switch-mode.ts new file mode 100644 index 000000000000..c83995f83185 --- /dev/null +++ b/src/core/prompts/tool-specs/switch-mode.ts @@ -0,0 +1,25 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for switch_mode + * This defines the schema for switching between modes + */ +export const switchModeToolSpec: ToolSpec = { + name: "switch_mode", + description: + "Request to switch to a different mode. This tool allows modes to request switching to another mode when needed, such as switching to Code mode to make code changes. The user must approve the mode switch.", + parameters: [ + { + name: "mode_slug", + type: "string", + required: true, + description: 'The slug of the mode to switch to (e.g., "code", "ask", "architect")', + }, + { + name: "reason", + type: "string", + required: false, + description: "The reason for switching modes", + }, + ], +} diff --git a/src/core/prompts/tool-specs/update-todo-list.ts b/src/core/prompts/tool-specs/update-todo-list.ts new file mode 100644 index 000000000000..81d96629ab07 --- /dev/null +++ b/src/core/prompts/tool-specs/update-todo-list.ts @@ -0,0 +1,20 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for update_todo_list + * This defines the schema for updating the todo list + */ +export const updateTodoListToolSpec: ToolSpec = { + name: "update_todo_list", + description: + "Replace the entire TODO list with an updated checklist reflecting the current state. Always provide the full list; the system will overwrite the previous one. This tool is designed for step-by-step task tracking, allowing you to confirm completion of each step before updating, update multiple task statuses at once (e.g., mark one as completed and start the next), and dynamically add new todos discovered during long or complex tasks.\n\nChecklist Format:\n- Use a single-level markdown checklist (no nesting or subtasks).\n- List todos in the intended execution order.\n- Status options:\n - [ ] Task description (pending)\n - [x] Task description (completed)\n - [-] Task description (in progress)\n\nStatus Rules:\n- [ ] = pending (not started)\n- [x] = completed (fully finished, no unresolved issues)\n- [-] = in_progress (currently being worked on)", + parameters: [ + { + name: "todos", + type: "string", + required: true, + description: + "The complete markdown checklist with status indicators. Include all todos, both completed and pending.", + }, + ], +} diff --git a/src/core/prompts/tool-specs/use-mcp-tool.ts b/src/core/prompts/tool-specs/use-mcp-tool.ts new file mode 100644 index 000000000000..a9f5d8d9f0c6 --- /dev/null +++ b/src/core/prompts/tool-specs/use-mcp-tool.ts @@ -0,0 +1,31 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for use_mcp_tool + * This defines the schema for using MCP server tools + */ +export const useMcpToolToolSpec: ToolSpec = { + name: "use_mcp_tool", + description: + "Request to use a tool provided by an MCP (Model Context Protocol) server. MCP servers extend Roo's capabilities with custom tools. This tool acts as a bridge to execute MCP server tools.", + parameters: [ + { + name: "server_name", + type: "string", + required: true, + description: "The name of the MCP server that provides the tool", + }, + { + name: "tool_name", + type: "string", + required: true, + description: "The name of the tool to execute on the MCP server", + }, + { + name: "arguments", + type: "string", + required: true, + description: "JSON string of arguments to pass to the MCP tool", + }, + ], +} diff --git a/src/core/prompts/tool-specs/write-to-file.ts b/src/core/prompts/tool-specs/write-to-file.ts new file mode 100644 index 000000000000..c670be7df2f6 --- /dev/null +++ b/src/core/prompts/tool-specs/write-to-file.ts @@ -0,0 +1,33 @@ +import type { ToolSpec } from "../../../api/transform/tool-converters" + +/** + * Tool specification for write_to_file + * This defines the schema for creating or overwriting files + */ +export const writeToFileToolSpec: ToolSpec = { + name: "write_to_file", + description: + "Request to write content to a file. This tool is primarily used for **creating new files** or for scenarios where a **complete rewrite of an existing file is intentionally required**. If the file exists, it will be overwritten. If it doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.", + parameters: [ + { + name: "path", + type: "string", + required: true, + description: "The path of the file to write to (relative to the current workspace directory)", + }, + { + name: "content", + type: "string", + required: true, + description: + "The content to write to the file. When performing a full rewrite of an existing file or creating a new one, ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file.", + }, + { + name: "line_count", + type: "integer", + required: true, + description: + "The number of lines in the file. Make sure to compute this based on the actual content of the file, not the number of lines in the content you're providing.", + }, + ], +} diff --git a/src/core/prompts/tools/index.ts b/src/core/prompts/tools/index.ts index c212b18a3de4..ae4b57580bb6 100644 --- a/src/core/prompts/tools/index.ts +++ b/src/core/prompts/tools/index.ts @@ -3,6 +3,7 @@ import type { ToolName, ModeConfig } from "@roo-code/types" import { TOOL_GROUPS, ALWAYS_AVAILABLE_TOOLS, DiffStrategy } from "../../../shared/tools" import { McpHub } from "../../../services/mcp/McpHub" import { Mode, getModeConfig, isToolAllowedForMode, getGroupName } from "../../../shared/modes" +import { CodeIndexManager } from "../../../services/code-index/manager" import { ToolArgs } from "./types" import { getExecuteCommandDescription } from "./execute-command" @@ -27,7 +28,50 @@ import { getCodebaseSearchDescription } from "./codebase-search" import { getUpdateTodoListDescription } from "./update-todo-list" import { getRunSlashCommandDescription } from "./run-slash-command" import { getGenerateImageDescription } from "./generate-image" -import { CodeIndexManager } from "../../../services/code-index/manager" + +/** + * Filters a list of tool names based on feature flags, settings, and experiments. + * This ensures consistent tool availability across XML and native tool modes. + */ +export function filterToolsByAvailability( + tools: ToolName[], + codeIndexManager: CodeIndexManager | undefined, + settings?: Record, + experiments?: Record, +): ToolName[] { + return tools.filter((tool) => { + // Conditionally exclude codebase_search if feature is disabled or not configured + if (tool === "codebase_search") { + if ( + !codeIndexManager || + !( + codeIndexManager.isFeatureEnabled && + codeIndexManager.isFeatureConfigured && + codeIndexManager.isInitialized + ) + ) { + return false + } + } + + // Conditionally exclude update_todo_list if disabled in settings + if (tool === "update_todo_list" && settings?.todoListEnabled === false) { + return false + } + + // Conditionally exclude generate_image if experiment is not enabled + if (tool === "generate_image" && !experiments?.imageGeneration) { + return false + } + + // Conditionally exclude run_slash_command if experiment is not enabled + if (tool === "run_slash_command" && !experiments?.runSlashCommand) { + return false + } + + return true + }) +} // Map of tool names to their description functions const toolDescriptionMap: Record string | undefined> = { @@ -120,31 +164,17 @@ export function getToolDescriptionsForMode( // Add always available tools ALWAYS_AVAILABLE_TOOLS.forEach((tool) => tools.add(tool)) - // Conditionally exclude codebase_search if feature is disabled or not configured - if ( - !codeIndexManager || - !(codeIndexManager.isFeatureEnabled && codeIndexManager.isFeatureConfigured && codeIndexManager.isInitialized) - ) { - tools.delete("codebase_search") - } - - // Conditionally exclude update_todo_list if disabled in settings - if (settings?.todoListEnabled === false) { - tools.delete("update_todo_list") - } - - // Conditionally exclude generate_image if experiment is not enabled - if (!experiments?.imageGeneration) { - tools.delete("generate_image") - } - - // Conditionally exclude run_slash_command if experiment is not enabled - if (!experiments?.runSlashCommand) { - tools.delete("run_slash_command") - } + // Apply consistent filtering across all tool modes + const filteredTools = filterToolsByAvailability( + Array.from(tools) as ToolName[], + codeIndexManager, + settings, + experiments, + ) + const filteredToolsSet = new Set(filteredTools) // Map tool descriptions for allowed tools - const descriptions = Array.from(tools).map((toolName) => { + const descriptions = Array.from(filteredToolsSet).map((toolName) => { const descriptionFn = toolDescriptionMap[toolName] if (!descriptionFn) { return undefined diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 51799b742ece..b30e9651b5d5 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -91,6 +91,9 @@ import { type AssistantMessageContent, presentAssistantMessage } from "../assist import { AssistantMessageParser } from "../assistant-message/AssistantMessageParser" import { truncateConversationIfNeeded } from "../sliding-window" import { ClineProvider } from "../webview/ClineProvider" +import { ToolUseHandler } from "../../api/transform/tool-use-handler" +import type { ToolSpec } from "../../api/transform/tool-converters" +import { ensureToolResultsFollowToolUse } from "../context/ensureToolResultsFollowToolUse" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" import { MultiFileSearchReplaceDiffStrategy } from "../diff/strategies/multi-file-search-replace" import { @@ -299,6 +302,10 @@ export class Task extends EventEmitter implements TaskLike { private lastUsedInstructions?: string private skipPrevResponseIdOnce: boolean = false + // Native Tool Calling + private useNativeToolCalls: boolean = false + private toolUseHandler: ToolUseHandler + // Token Usage Cache private tokenUsageSnapshot?: TokenUsage private tokenUsageSnapshotAt?: number @@ -405,6 +412,9 @@ export class Task extends EventEmitter implements TaskLike { // Initialize the assistant message parser. this.assistantMessageParser = new AssistantMessageParser() + // Initialize the tool use handler for native tool calling + this.toolUseHandler = new ToolUseHandler() + this.messageQueueService = new MessageQueueService() this.messageQueueStateChangedHandler = () => { @@ -576,6 +586,15 @@ export class Task extends EventEmitter implements TaskLike { return this._taskMode } + /** + * Returns whether native tool calling is enabled for this task + * @returns True if using native tool calls, false for XML-based tools + * @public + */ + public get isUsingNativeToolCalls(): boolean { + return this.useNativeToolCalls + } + static create(options: TaskOptions): [Task, Promise] { const instance = new Task({ ...options, startTask: false }) const { images, task, historyItem } = options @@ -1208,7 +1227,9 @@ export class Task extends EventEmitter implements TaskLike { relPath ? ` for '${relPath.toPosix()}'` : "" } without value for required parameter '${paramName}'. Retrying...`, ) - return formatResponse.toolError(formatResponse.missingToolParameterError(paramName)) + return formatResponse.toolError( + formatResponse.missingToolParameterError(paramName, this.isUsingNativeToolCalls), + ) } // Lifecycle @@ -1736,7 +1757,7 @@ export class Task extends EventEmitter implements TaskLike { // the user hits max requests and denies resetting the count. break } else { - nextUserContent = [{ type: "text", text: formatResponse.noToolsUsed() }] + nextUserContent = [{ type: "text", text: formatResponse.noToolsUsed(this.isUsingNativeToolCalls) }] this.consecutiveMistakeCount++ } } @@ -1943,6 +1964,7 @@ export class Task extends EventEmitter implements TaskLike { this.presentAssistantMessageLocked = false this.presentAssistantMessageHasPendingUpdates = false this.assistantMessageParser.reset() + this.toolUseHandler.reset() await this.diffViewProvider.reset() @@ -1998,6 +2020,35 @@ export class Task extends EventEmitter implements TaskLike { pendingGroundingSources.push(...chunk.sources) } break + case "tool_calls": { + if (!chunk.tool_call) { + break + } + + // Accumulate tool use blocks in proper Anthropic format + this.toolUseHandler.processToolUseDelta({ + id: chunk.tool_call.function?.id, + type: "tool_use", + name: chunk.tool_call.function?.name, + input: chunk.tool_call.function?.arguments, + }) + + const prevLength = this.assistantMessageContent.length + + // Combine any text content with tool uses + const textContent = assistantMessage.trim() + const textBlocks: AssistantMessageContent[] = textContent + ? [{ type: "text", content: textContent, partial: false }] + : [] + const toolBlocks = this.toolUseHandler.getPartialToolUsesAsContent() + this.assistantMessageContent = [...textBlocks, ...toolBlocks] + + if (this.assistantMessageContent.length > prevLength) { + this.userMessageContentReady = false + } + presentAssistantMessage(this) + break + } case "text": { assistantMessage += chunk.text @@ -2279,9 +2330,33 @@ export class Task extends EventEmitter implements TaskLike { // Can't just do this b/c a tool could be in the middle of executing. // this.assistantMessageContent.forEach((e) => (e.partial = false)) - // Now that the stream is complete, finalize any remaining partial content blocks - this.assistantMessageParser.finalizeContentBlocks() - this.assistantMessageContent = this.assistantMessageParser.getContentBlocks() + // Finalize any remaining tool calls at the end of the stream + if (this.useNativeToolCalls) { + // For native tool calls, mark all pending tool uses as complete + const prevLength = this.assistantMessageContent.length + + // Get finalized tool uses and mark them as complete + const textContent = assistantMessage.trim() + const textBlocks: AssistantMessageContent[] = textContent + ? [{ type: "text", content: textContent, partial: false }] + : [] + + // Get all finalized tool uses and mark as complete + const toolBlocks = this.toolUseHandler + .getPartialToolUsesAsContent() + .map((block) => ({ ...block, partial: false })) + + this.assistantMessageContent = [...textBlocks, ...toolBlocks] + + if (this.assistantMessageContent.length > prevLength) { + this.userMessageContentReady = false + } + presentAssistantMessage(this) + } else { + // XML-based tools: use the existing parser + this.assistantMessageParser.finalizeContentBlocks() + this.assistantMessageContent = this.assistantMessageParser.getContentBlocks() + } if (partialBlocks.length > 0) { // If there is content to update then it will complete and @@ -2324,7 +2399,7 @@ export class Task extends EventEmitter implements TaskLike { // able to save the assistant's response. let didEndLoop = false - if (assistantMessage.length > 0) { + if (assistantMessage.length > 0 || this.useNativeToolCalls) { // Display grounding sources to the user if they exist if (pendingGroundingSources.length > 0) { const citationLinks = pendingGroundingSources.map((source, i) => `[${i + 1}](${source.url})`) @@ -2335,9 +2410,44 @@ export class Task extends EventEmitter implements TaskLike { }) } + // Build assistant message content + let assistantContent: Array + + if (this.useNativeToolCalls) { + // Get finalized tool use blocks from the handler + const toolUseBlocks = this.toolUseHandler.getAllFinalizedToolUses() + + // Log finalized native tool calls + if (toolUseBlocks.length > 0) { + console.log( + `[NATIVE_TOOL_CALL] Finalized ${toolUseBlocks.length} tool(s):`, + toolUseBlocks.map((t) => `${t.name}(id:${t.id})`).join(", "), + ) + } + + // Build content array with text (if any) and tool use blocks + assistantContent = [] + + // Only add text block if there's actual text + if (assistantMessage.trim().length > 0) { + assistantContent.push({ + type: "text", + text: assistantMessage, + }) + } + + // Append tool use blocks if any exist + if (toolUseBlocks.length > 0) { + assistantContent.push(...toolUseBlocks) + } + } else { + // XML-based tools: just text + assistantContent = [{ type: "text", text: assistantMessage }] + } + await this.addToApiConversationHistory({ role: "assistant", - content: [{ type: "text", text: assistantMessage }], + content: assistantContent, }) TelemetryService.instance.captureConversationMessage(this.taskId, "assistant") @@ -2365,7 +2475,10 @@ export class Task extends EventEmitter implements TaskLike { const didToolUse = this.assistantMessageContent.some((block) => block.type === "tool_use") if (!didToolUse) { - this.userMessageContent.push({ type: "text", text: formatResponse.noToolsUsed() }) + this.userMessageContent.push({ + type: "text", + text: formatResponse.noToolsUsed(this.isUsingNativeToolCalls), + }) this.consecutiveMistakeCount++ } @@ -2412,7 +2525,7 @@ export class Task extends EventEmitter implements TaskLike { return false } - private async getSystemPrompt(): Promise { + private async getSystemPromptAndTools(): Promise<{ systemPrompt: string; tools?: ToolSpec[] }> { const { mcpEnabled } = (await this.providerRef.deref()?.getState()) ?? {} let mcpHub: McpHub | undefined if (mcpEnabled ?? true) { @@ -2454,53 +2567,59 @@ export class Task extends EventEmitter implements TaskLike { apiConfiguration, } = state ?? {} - return await (async () => { - const provider = this.providerRef.deref() + const provider = this.providerRef.deref() - if (!provider) { - throw new Error("Provider not available") - } + if (!provider) { + throw new Error("Provider not available") + } - // Align browser tool enablement with generateSystemPrompt: require model image support, - // mode to include the browser group, and the user setting to be enabled. - const modeConfig = getModeBySlug(mode ?? defaultModeSlug, customModes) - const modeSupportsBrowser = modeConfig?.groups.some((group) => getGroupName(group) === "browser") ?? false + // Align browser tool enablement with generateSystemPrompt: require model image support, + // mode to include the browser group, and the user setting to be enabled. + const modeConfig = getModeBySlug(mode ?? defaultModeSlug, customModes) + const modeSupportsBrowser = modeConfig?.groups.some((group) => getGroupName(group) === "browser") ?? false - // Check if model supports browser capability (images) - const modelInfo = this.api.getModel().info - const modelSupportsBrowser = (modelInfo as any)?.supportsImages === true - - const canUseBrowserTool = modelSupportsBrowser && modeSupportsBrowser && (browserToolEnabled ?? true) - - return SYSTEM_PROMPT( - provider.context, - this.cwd, - canUseBrowserTool, - mcpHub, - this.diffStrategy, - browserViewportSize ?? "900x600", - mode ?? defaultModeSlug, - customModePrompts, - customModes, - customInstructions, - this.diffEnabled, - experiments, - enableMcpServerCreation, - language, - rooIgnoreInstructions, - maxReadFileLine !== -1, - { - maxConcurrentFileReads: maxConcurrentFileReads ?? 5, - todoListEnabled: apiConfiguration?.todoListEnabled ?? true, - useAgentRules: vscode.workspace.getConfiguration("roo-cline").get("useAgentRules") ?? true, - newTaskRequireTodos: vscode.workspace - .getConfiguration("roo-cline") - .get("newTaskRequireTodos", false), - }, - undefined, // todoList - this.api.getModel().id, - ) - })() + // Check if model supports browser capability (images) + const modelInfo = this.api.getModel().info + const modelSupportsBrowser = (modelInfo as any)?.supportsImages === true + + const canUseBrowserTool = modelSupportsBrowser && modeSupportsBrowser && (browserToolEnabled ?? true) + + // Check if this provider supports native tools (checks both capability and user setting) + const useNativeTools = this.api.supportsNativeTools() + + return await SYSTEM_PROMPT( + provider.context, + this.cwd, + canUseBrowserTool, + mcpHub, + this.diffStrategy, + browserViewportSize ?? "900x600", + mode ?? defaultModeSlug, + customModePrompts, + customModes, + customInstructions, + this.diffEnabled, + experiments, + enableMcpServerCreation, + language, + rooIgnoreInstructions, + maxReadFileLine !== -1, + { + maxConcurrentFileReads: maxConcurrentFileReads ?? 5, + todoListEnabled: apiConfiguration?.todoListEnabled ?? true, + useAgentRules: vscode.workspace.getConfiguration("roo-cline").get("useAgentRules") ?? true, + newTaskRequireTodos: vscode.workspace + .getConfiguration("roo-cline") + .get("newTaskRequireTodos", false), + }, + this.api.getModel().id, + useNativeTools, + ) + } + + private async getSystemPrompt(): Promise { + const result = await this.getSystemPromptAndTools() + return result.systemPrompt } private getCurrentProfileId(state: any): string { @@ -2633,8 +2752,10 @@ export class Task extends EventEmitter implements TaskLike { // requests — even from new subtasks — will honour the provider's rate-limit. Task.lastGlobalApiRequestTime = performance.now() - const systemPrompt = await this.getSystemPrompt() + const { systemPrompt, tools } = await this.getSystemPromptAndTools() this.lastUsedInstructions = systemPrompt + this.useNativeToolCalls = !!tools?.length + const { contextTokens } = this.getTokenUsage() if (contextTokens) { @@ -2696,6 +2817,11 @@ export class Task extends EventEmitter implements TaskLike { ({ role, content }) => ({ role, content }), ) + // Ensure tool results follow tool uses when using native tool calling + if (this.useNativeToolCalls) { + ensureToolResultsFollowToolUse(cleanConversationHistory) + } + // Check auto-approval limits const approvalResult = await this.autoApprovalHandler.checkAutoApprovalLimits( state, @@ -2749,7 +2875,7 @@ export class Task extends EventEmitter implements TaskLike { this.skipPrevResponseIdOnce = false } - const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata) + const stream = this.api.createMessage(systemPrompt, cleanConversationHistory, metadata, tools) const iterator = stream[Symbol.asyncIterator]() try { diff --git a/src/core/tools/writeToFileTool.ts b/src/core/tools/writeToFileTool.ts index 5abd96a20aff..b703e97bf031 100644 --- a/src/core/tools/writeToFileTool.ts +++ b/src/core/tools/writeToFileTool.ts @@ -152,7 +152,12 @@ export async function writeToFileTool( pushToolResult( formatResponse.toolError( - formatResponse.lineCountTruncationError(actualLineCount, isNewFile, diffStrategyEnabled), + formatResponse.lineCountTruncationError( + actualLineCount, + isNewFile, + diffStrategyEnabled, + cline.isUsingNativeToolCalls, + ), ), ) await cline.diffViewProvider.revertChanges() diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index 3d68fac2acb0..e4278df61252 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -288,7 +288,7 @@ vi.mock("../../../shared/modes", () => ({ })) vi.mock("../../prompts/system", () => ({ - SYSTEM_PROMPT: vi.fn().mockResolvedValue("mocked system prompt"), + SYSTEM_PROMPT: vi.fn().mockResolvedValue({ systemPrompt: "mocked system prompt" }), codeMode: "code", })) @@ -297,6 +297,7 @@ vi.mock("../../../api", () => ({ getModel: vi.fn().mockReturnValue({ id: "claude-3-sonnet", }), + supportsNativeTools: vi.fn().mockReturnValue(false), }), })) diff --git a/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts b/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts index 9b3f94f309b1..2165f593cf77 100644 --- a/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts +++ b/src/core/webview/__tests__/generateSystemPrompt.browser-capability.spec.ts @@ -6,8 +6,8 @@ import { generateSystemPrompt } from "../generateSystemPrompt" // Mock SYSTEM_PROMPT to capture its third argument (browser capability flag) vi.mock("../../prompts/system", () => ({ SYSTEM_PROMPT: vi.fn(async (_ctx, _cwd, canUseBrowserTool: boolean) => { - // return a simple string to satisfy return type - return `SYSTEM_PROMPT:${canUseBrowserTool}` + // return object with systemPrompt to match new return type + return { systemPrompt: `SYSTEM_PROMPT:${canUseBrowserTool}` } }), })) @@ -23,6 +23,7 @@ vi.mock("../../../api", () => ({ supportsPromptCache: false, }, }), + supportsNativeTools: () => false, })), })) diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index 0920a6164826..e503fd6d0073 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -67,7 +67,16 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web // and browser tools are enabled in settings const canUseBrowserTool = modelSupportsBrowser && modeSupportsBrowser && (browserToolEnabled ?? true) - const systemPrompt = await SYSTEM_PROMPT( + // Check if the provider supports native tools (checks both capability and user setting) + let useNativeTools = false + try { + const tempApiHandler = buildApiHandler(apiConfiguration) + useNativeTools = tempApiHandler.supportsNativeTools() + } catch (error) { + // If we can't build the API handler, default to false + } + + const { systemPrompt } = await SYSTEM_PROMPT( provider.context, cwd, canUseBrowserTool, @@ -92,6 +101,8 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web .getConfiguration("roo-cline") .get("newTaskRequireTodos", false), }, + undefined, // modelId + useNativeTools, ) return systemPrompt diff --git a/src/integrations/misc/export-markdown.ts b/src/integrations/misc/export-markdown.ts index 2d493ce50cb9..ae7d2736f90c 100644 --- a/src/integrations/misc/export-markdown.ts +++ b/src/integrations/misc/export-markdown.ts @@ -48,15 +48,7 @@ export function formatContentBlockToMarkdown(block: Anthropic.Messages.ContentBl case "image": return `[Image]` case "tool_use": { - let input: string - if (typeof block.input === "object" && block.input !== null) { - input = Object.entries(block.input) - .map(([key, value]) => `${key.charAt(0).toUpperCase() + key.slice(1)}: ${value}`) - .join("\n") - } else { - input = String(block.input) - } - return `[Tool Use: ${block.name}]\n${input}` + return `[Tool Use: ${block.name}]\n${JSON.stringify(block.input, null, 2)}` } case "tool_result": { // For now we're not doing tool name lookup since we don't use tools anymore diff --git a/src/package.json b/src/package.json index 6f9d170c4d8c..059ce03d93be 100644 --- a/src/package.json +++ b/src/package.json @@ -423,6 +423,11 @@ "default": false, "description": "%settings.newTaskRequireTodos.description%" }, + "roo-cline.nativeToolCalling": { + "type": "boolean", + "default": false, + "description": "Enable native tool calling (using provider's built-in tool format) instead of XML-based tools. This is more efficient but experimental." + }, "roo-cline.codeIndex.embeddingBatchSize": { "type": "number", "default": 60, @@ -462,6 +467,7 @@ "@roo-code/ipc": "workspace:^", "@roo-code/telemetry": "workspace:^", "@roo-code/types": "workspace:^", + "@streamparser/json": "^0.0.22", "@vscode/codicons": "^0.0.36", "async-mutex": "^0.5.0", "axios": "^1.12.0",