diff --git a/src/core/assistant-message/AssistantMessageParser.ts b/src/core/assistant-message/AssistantMessageParser.ts index 364ec603f220..bd6b0bb49899 100644 --- a/src/core/assistant-message/AssistantMessageParser.ts +++ b/src/core/assistant-message/AssistantMessageParser.ts @@ -1,6 +1,7 @@ import { type ToolName, toolNames } from "@roo-code/types" import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" import { AssistantMessageContent } from "./parseAssistantMessage" +import { FunctionCallsStreamingNormalizer } from "./functionCallsNormalizer" /** * Parser for assistant messages. Maintains state between chunks @@ -17,6 +18,11 @@ export class AssistantMessageParser { private readonly MAX_ACCUMULATOR_SIZE = 1024 * 1024 // 1MB limit private readonly MAX_PARAM_LENGTH = 1024 * 100 // 100KB per parameter limit private accumulator = "" + // VSCode-LM function_calls/invoke streaming normalizer + private normalizer = new FunctionCallsStreamingNormalizer() + // Minimal telemetry flags (readable by caller if needed) + public functionCallsNormalized = false + public functionCallsToolNamesEncountered = new Set() /** * Initialize a new AssistantMessageParser instance. @@ -37,6 +43,10 @@ export class AssistantMessageParser { this.currentParamName = undefined this.currentParamValueStartIndex = 0 this.accumulator = "" + // Reset normalizer and telemetry + this.normalizer.reset() + this.functionCallsNormalized = false + this.functionCallsToolNamesEncountered.clear() } /** @@ -52,14 +62,24 @@ export class AssistantMessageParser { * @param chunk The new chunk of text to process. */ public processChunk(chunk: string): AssistantMessageContent[] { - if (this.accumulator.length + chunk.length > this.MAX_ACCUMULATOR_SIZE) { + // Pre-normalize VSCode-LM function_calls/invoke XML to native tool XML + const normalizedChunk = this.normalizer.process(chunk) + // Collect minimal telemetry + if (this.normalizer.normalizedInLastChunk) { + this.functionCallsNormalized = true + } + for (const name of this.normalizer.toolNamesEncountered) { + this.functionCallsToolNamesEncountered.add(name) + } + + if (this.accumulator.length + normalizedChunk.length > this.MAX_ACCUMULATOR_SIZE) { throw new Error("Assistant message exceeds maximum allowed size") } // Store the current length of the accumulator before adding the new chunk const accumulatorStartLength = this.accumulator.length - for (let i = 0; i < chunk.length; i++) { - const char = chunk[i] + for (let i = 0; i < normalizedChunk.length; i++) { + const char = normalizedChunk[i] this.accumulator += char const currentPosition = accumulatorStartLength + i @@ -78,10 +98,16 @@ export class AssistantMessageParser { // End of param value. // Do not trim content parameters to preserve newlines, but strip first and last newline only const paramValue = currentParamValue.slice(0, -paramClosingTag.length) - this.currentToolUse.params[this.currentParamName] = - this.currentParamName === "content" - ? paramValue.replace(/^\n/, "").replace(/\n$/, "") - : paramValue.trim() + if (this.currentParamName === "content") { + this.currentToolUse.params[this.currentParamName] = paramValue + .replace(/^\n/, "") + .replace(/\n$/, "") + } else if (this.currentParamName === "args") { + // Preserve args exactly, including whitespace/newlines + this.currentToolUse.params[this.currentParamName] = paramValue + } else { + this.currentToolUse.params[this.currentParamName] = paramValue.trim() + } this.currentParamName = undefined continue } else { diff --git a/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts b/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts index 6b7c3915ee7e..b412f2fc5d2f 100644 --- a/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts +++ b/src/core/assistant-message/__tests__/AssistantMessageParser.spec.ts @@ -392,3 +392,69 @@ describe("AssistantMessageParser (streaming)", () => { }) }) }) + +// VSCode-LM function_calls normalizer tests (streaming) +describe("VSCode-LM function_calls normalizer (streaming)", () => { + it("should normalize single invoke with args preserved", () => { + const parser = new AssistantMessageParser() + const argsXml = "src/a.ts" + const message = `${argsXml}` + const result = streamChunks(parser, message).filter((block) => !isEmptyTextContent(block)) + expect(result).toHaveLength(1) + const toolUse = result[0] as ToolUse + expect(toolUse.type).toBe("tool_use") + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.args).toBe(argsXml) + expect(toolUse.partial).toBe(false) + }) + + it("should handle multiple invokes with surrounding text", () => { + const parser = new AssistantMessageParser() + const args1 = "file1.ts" + const args2 = "file2.ts" + const message = `Before ${args1} Middle ${args2} After` + const result = streamChunks(parser, message) + expect(result).toHaveLength(5) + + expect(result[0].type).toBe("text") + expect((result[0] as TextContent).content).toBe("Before") + + const toolUse1 = result[1] as ToolUse + expect(toolUse1.type).toBe("tool_use") + expect(toolUse1.name).toBe("read_file") + expect(toolUse1.params.args).toBe(args1) + + expect(result[2].type).toBe("text") + expect((result[2] as TextContent).content).toBe("Middle") + + const toolUse2 = result[3] as ToolUse + expect(toolUse2.type).toBe("tool_use") + expect(toolUse2.name).toBe("read_file") + expect(toolUse2.params.args).toBe(args2) + + expect(result[4].type).toBe("text") + expect((result[4] as TextContent).content).toBe("After") + }) + + it("should pass through unknown invoke as text and not create tool_use", () => { + const parser = new AssistantMessageParser() + const message = `y` + const result = streamChunks(parser, message) + expect(result).toHaveLength(1) + const text = result[0] as TextContent + expect(text.type).toBe("text") + expect(text.content).toContain('') + }) + + it("should preserve multi-file args xml exactly", () => { + const parser = new AssistantMessageParser() + const argsXml = "a.tsb.ts" + const message = `${argsXml}` + const result = streamChunks(parser, message).filter((block) => !isEmptyTextContent(block)) + expect(result).toHaveLength(1) + const toolUse = result[0] as ToolUse + expect(toolUse.type).toBe("tool_use") + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.args).toBe(argsXml) + }) +}) diff --git a/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts b/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts index f5ae600beed4..6615e32dcfdc 100644 --- a/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts +++ b/src/core/assistant-message/__tests__/parseAssistantMessage.spec.ts @@ -338,3 +338,78 @@ const isEmptyTextContent = (block: AssistantMessageContent) => }) }) }) + +// VSCode-LM function_calls normalizer tests (non-stream) +;[parseAssistantMessageV1, parseAssistantMessageV2].forEach((parser, index) => { + describe(`VSCode-LM function_calls normalizer (non-stream) V${index + 1}`, () => { + it("should normalize single invoke with args preserved", () => { + const argsXml = "src/a.ts" + const message = `${argsXml}` + const result = parser(message).filter((block) => !isEmptyTextContent(block)) + expect(result).toHaveLength(1) + const toolUse = result[0] as ToolUse + expect(toolUse.type).toBe("tool_use") + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.args).toBe(argsXml) + expect(toolUse.partial).toBe(false) + }) + + it("should handle multiple invokes with surrounding text", () => { + const args1 = "file1.ts" + const args2 = "file2.ts" + const message = `Before ${args1} Middle ${args2} After` + const result = parser(message) + + expect(result).toHaveLength(5) + + expect(result[0].type).toBe("text") + expect((result[0] as TextContent).content).toBe("Before") + + const toolUse1 = result[1] as ToolUse + expect(toolUse1.type).toBe("tool_use") + expect(toolUse1.name).toBe("read_file") + expect(toolUse1.params.args).toBe(args1) + + expect(result[2].type).toBe("text") + expect((result[2] as TextContent).content).toBe("Middle") + + const toolUse2 = result[3] as ToolUse + expect(toolUse2.type).toBe("tool_use") + expect(toolUse2.name).toBe("read_file") + expect(toolUse2.params.args).toBe(args2) + + expect(result[4].type).toBe("text") + expect((result[4] as TextContent).content).toBe("After") + }) + + it("should pass through unknown invoke as text and not create tool_use", () => { + const message = `y` + const result = parser(message) + expect(result).toHaveLength(1) + const text = result[0] as TextContent + expect(text.type).toBe("text") + expect(text.content).toContain('') + }) + + it("should preserve multi-file args xml exactly", () => { + const argsXml = "a.tsb.ts" + const message = `${argsXml}` + const result = parser(message).filter((block) => !isEmptyTextContent(block)) + expect(result).toHaveLength(1) + const toolUse = result[0] as ToolUse + expect(toolUse.type).toBe("tool_use") + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.args).toBe(argsXml) + }) + + it("should be idempotent for native tool XML (no changes)", () => { + const native = "src/x.ts" + const result = parser(native).filter((b) => !isEmptyTextContent(b)) + expect(result).toHaveLength(1) + const toolUse = result[0] as ToolUse + expect(toolUse.name).toBe("read_file") + expect(toolUse.params.path).toBe("src/x.ts") + expect(toolUse.partial).toBe(false) + }) + }) +}) diff --git a/src/core/assistant-message/functionCallsNormalizer.ts b/src/core/assistant-message/functionCallsNormalizer.ts new file mode 100644 index 000000000000..403b0b4a397e --- /dev/null +++ b/src/core/assistant-message/functionCallsNormalizer.ts @@ -0,0 +1,213 @@ +import { type ToolName, toolNames } from "@roo-code/types" + +/** + * Streaming normalizer for VSCode-LM style function_calls/invoke XML. + * Converts: + * ... + * to: + * ... + * + * - Removes outer container tags + * - Rewrites to and to (only for known tools) + * - Leaves unknown tool names and native tool tags untouched + * - Preserves inner and any whitespace/newlines verbatim + * - Resilient to chunk boundaries (buffers incomplete tags) + */ +export class FunctionCallsStreamingNormalizer { + private buffer = "" + private readonly tailLimit = 512 + private readonly knownTools = new Set(toolNames) + private readonly MAX_ACCUMULATOR_SIZE = 1024 * 1024 // 1MB guidance + // Track invoke stack to map closing to the correct + private invokeStack: Array<{ name: string; known: boolean }> = [] + + // Stats (can be read by caller if desired) + public normalizedInLastChunk = false + public toolNamesEncountered = new Set() + + public reset(): void { + this.buffer = "" + this.invokeStack = [] + this.normalizedInLastChunk = false + this.toolNamesEncountered.clear() + } + + /** + * Process a streaming chunk and return normalized text for downstream parser. + * May return an empty string if only container tags were removed. + */ + public process(chunk: string): string { + if (!chunk) return "" + if (this.buffer.length + chunk.length > this.MAX_ACCUMULATOR_SIZE) { + // Protect against unbounded growth due to pathological streams + throw new Error("Assistant message exceeds maximum allowed size") + } + + this.buffer += chunk + let out = "" + let i = 0 + this.normalizedInLastChunk = false + + const emit = (s: string) => { + out += s + } + + const openContainer = "" + const closeContainer = "" + + while (i < this.buffer.length) { + const ch = this.buffer[i] + + if (ch !== "<") { + emit(ch) + i++ + continue + } + + // We have a potential tag start. Find the next '>' to determine if we have a complete tag. + const closeIdx = this.buffer.indexOf(">", i) + if (closeIdx === -1) { + // Incomplete tag - wait for more data + break + } + + const tag = this.buffer.slice(i, closeIdx + 1) + + // 1) Handle container removal exactly + if (tag === openContainer) { + // Drop it + this.normalizedInLastChunk = true + i = closeIdx + 1 + continue + } + if (tag === closeContainer) { + // Drop it + this.normalizedInLastChunk = true + i = closeIdx + 1 + continue + } + + // 2) Handle opening tag + // Accept forms like: (other attributes are ignored/preserved only if unknown) + const invokeOpenMatch = tag.match(/^]*?\bname="([^"]+)"[^>]*>$/) + if (invokeOpenMatch) { + const tool = invokeOpenMatch[1] + const known = this.knownTools.has(tool) + this.toolNamesEncountered.add(tool) + if (known) { + emit(`<${tool}>`) + this.invokeStack.push({ name: tool, known: true }) + this.normalizedInLastChunk = true + } else { + // Unknown tool name - pass through untouched and track a non-known frame so we can pair closing tag + emit(tag) + this.invokeStack.push({ name: tool, known: false }) + } + i = closeIdx + 1 + continue + } + + // 3) Handle closing tag (allow optional attributes/whitespace just in case) + if (/^<\/invoke\b[^>]*>$/.test(tag)) { + const frame = this.invokeStack.pop() + if (frame && frame.known) { + emit(``) + this.normalizedInLastChunk = true + } else { + // No frame or unknown -> pass through + emit(tag) + } + i = closeIdx + 1 + continue + } + + // 4) Not a function_calls/invoke tag we care about - pass through as-is + emit(tag) + i = closeIdx + 1 + } + + // Keep only the unprocessed tail in buffer (incomplete tag), with a small cap + this.buffer = this.buffer.slice(i) + if (this.buffer.length > this.tailLimit) { + // Keep last N chars to catch split tags; safe because anything before was fully emitted + this.buffer = this.buffer.slice(-this.tailLimit) + } + + return out + } +} + +/** + * One-shot non-stream normalization of VSCode-LM function_calls/invoke XML. + * See class comments for behavior. + */ +export function normalizeFunctionCallsXml(input: string): string { + if (!input) return input + if (!input.includes("(toolNames) + const openContainer = "" + const closeContainer = "" + + let out = "" + const stack: Array<{ name: string; known: boolean }> = [] + + let i = 0 + const len = input.length + + while (i < len) { + const ch = input[i] + if (ch !== "<") { + out += ch + i++ + continue + } + + const closeIdx = input.indexOf(">", i) + if (closeIdx === -1) { + // Malformed/incomplete -> best effort: return original input unchanged + return input + } + + const tag = input.slice(i, closeIdx + 1) + + if (tag === openContainer || tag === closeContainer) { + // Remove containers + i = closeIdx + 1 + continue + } + + const invokeOpenMatch = tag.match(/^]*?\bname="([^"]+)"[^>]*>$/) + if (invokeOpenMatch) { + const tool = invokeOpenMatch[1] + const known = knownTools.has(tool) + stack.push({ name: tool, known }) + out += known ? `<${tool}>` : tag + i = closeIdx + 1 + continue + } + + if (/^<\/invoke\b[^>]*>$/.test(tag)) { + const frame = stack.pop() + if (frame && frame.known) { + out += `` + } else { + out += tag + } + i = closeIdx + 1 + continue + } + + // Any other tag - copy through verbatim + out += tag + i = closeIdx + 1 + } + + // If stack not empty or other malformation, we still return best-effort result. + // The plan specifies: If malformed, return original input and log once (best-effort). + // We opt for best-effort (already produced) to avoid dropping content. + return out +} diff --git a/src/core/assistant-message/parseAssistantMessage.ts b/src/core/assistant-message/parseAssistantMessage.ts index ebb8674c8fa4..0bb59e187164 100644 --- a/src/core/assistant-message/parseAssistantMessage.ts +++ b/src/core/assistant-message/parseAssistantMessage.ts @@ -1,10 +1,14 @@ import { type ToolName, toolNames } from "@roo-code/types" import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" +import { normalizeFunctionCallsXml } from "./functionCallsNormalizer" export type AssistantMessageContent = TextContent | ToolUse export function parseAssistantMessage(assistantMessage: string): AssistantMessageContent[] { + // Pre-normalize VSCode-LM function_calls/invoke XML to native tool XML + assistantMessage = normalizeFunctionCallsXml(assistantMessage) + let contentBlocks: AssistantMessageContent[] = [] let currentTextContent: TextContent | undefined = undefined let currentTextContentStartIndex = 0 @@ -24,12 +28,15 @@ export function parseAssistantMessage(assistantMessage: string): AssistantMessag const paramClosingTag = `` if (currentParamValue.endsWith(paramClosingTag)) { // End of param value. - // Don't trim content parameters to preserve newlines, but strip first and last newline only + // Preserve args exactly; content preserves newlines except first/last; others trimmed const paramValue = currentParamValue.slice(0, -paramClosingTag.length) - currentToolUse.params[currentParamName] = - currentParamName === "content" - ? paramValue.replace(/^\n/, "").replace(/\n$/, "") - : paramValue.trim() + if (currentParamName === "content") { + currentToolUse.params[currentParamName] = paramValue.replace(/^\n/, "").replace(/\n$/, "") + } else if (currentParamName === "args") { + currentToolUse.params[currentParamName] = paramValue + } else { + currentToolUse.params[currentParamName] = paramValue.trim() + } currentParamName = undefined continue } else { @@ -147,8 +154,13 @@ export function parseAssistantMessage(assistantMessage: string): AssistantMessag // Tool call has a parameter that was not completed. // Don't trim content parameters to preserve newlines, but strip first and last newline only const paramValue = accumulator.slice(currentParamValueStartIndex) - currentToolUse.params[currentParamName] = - currentParamName === "content" ? paramValue.replace(/^\n/, "").replace(/\n$/, "") : paramValue.trim() + if (currentParamName === "content") { + currentToolUse.params[currentParamName] = paramValue.replace(/^\n/, "").replace(/\n$/, "") + } else if (currentParamName === "args") { + currentToolUse.params[currentParamName] = paramValue + } else { + currentToolUse.params[currentParamName] = paramValue.trim() + } } contentBlocks.push(currentToolUse) diff --git a/src/core/assistant-message/parseAssistantMessageV2.ts b/src/core/assistant-message/parseAssistantMessageV2.ts index 7c7526cbdb4d..3d23fb446bc7 100644 --- a/src/core/assistant-message/parseAssistantMessageV2.ts +++ b/src/core/assistant-message/parseAssistantMessageV2.ts @@ -1,6 +1,7 @@ import { type ToolName, toolNames } from "@roo-code/types" import { TextContent, ToolUse, ToolParamName, toolParamNames } from "../../shared/tools" +import { normalizeFunctionCallsXml } from "./functionCallsNormalizer" export type AssistantMessageContent = TextContent | ToolUse @@ -38,6 +39,9 @@ export type AssistantMessageContent = TextContent | ToolUse */ export function parseAssistantMessageV2(assistantMessage: string): AssistantMessageContent[] { + // Pre-normalize VSCode-LM function_calls/invoke XML to native tool XML + assistantMessage = normalizeFunctionCallsXml(assistantMessage) + const contentBlocks: AssistantMessageContent[] = [] let currentTextContentStart = 0 // Index where the current text block started. @@ -80,9 +84,14 @@ export function parseAssistantMessageV2(assistantMessage: string): AssistantMess currentParamValueStart, // Start after the opening tag. currentCharIndex - closeTag.length + 1, // End before the closing tag. ) - // Don't trim content parameters to preserve newlines, but strip first and last newline only - currentToolUse.params[currentParamName] = - currentParamName === "content" ? value.replace(/^\n/, "").replace(/\n$/, "") : value.trim() + // Preserve args exactly; content preserves newlines (strip first/last); others trimmed + if (currentParamName === "content") { + currentToolUse.params[currentParamName] = value.replace(/^\n/, "").replace(/\n$/, "") + } else if (currentParamName === "args") { + currentToolUse.params[currentParamName] = value + } else { + currentToolUse.params[currentParamName] = value.trim() + } currentParamName = undefined // Go back to parsing tool content. // We don't continue loop here, need to check for tool close or other params at index i. } else { @@ -253,9 +262,14 @@ export function parseAssistantMessageV2(assistantMessage: string): AssistantMess // Finalize any open parameter within an open tool use. if (currentToolUse && currentParamName) { const value = assistantMessage.slice(currentParamValueStart) // From param start to end of string. - // Don't trim content parameters to preserve newlines, but strip first and last newline only - currentToolUse.params[currentParamName] = - currentParamName === "content" ? value.replace(/^\n/, "").replace(/\n$/, "") : value.trim() + // Preserve args exactly; content preserves newlines (strip first/last); others trimmed + if (currentParamName === "content") { + currentToolUse.params[currentParamName] = value.replace(/^\n/, "").replace(/\n$/, "") + } else if (currentParamName === "args") { + currentToolUse.params[currentParamName] = value + } else { + currentToolUse.params[currentParamName] = value.trim() + } // Tool use remains partial. } diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 689675999fd1..f6bb63cd428c 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -150,7 +150,30 @@ export async function presentAssistantMessage(cline: Task) { } } - await cline.say("text", content, undefined, block.partial) + // Attach minimal telemetry metadata about function_calls normalization to the text message (no PII) + const normalized = cline.assistantMessageParser.functionCallsNormalized + const toolNamesEncountered = Array.from( + cline.assistantMessageParser.functionCallsToolNamesEncountered || [], + ) + const modelIdForMeta = cline.api.getModel().id + + await cline.say( + "text", + content, + undefined, + block.partial, + undefined, + undefined, + normalized + ? { + metadata: { + function_calls_normalized: true, + toolNamesEncountered, + modelId: modelIdForMeta, + }, + } + : undefined, + ) break } case "tool_use":