fix: sanitize tool tags from XAI reasoning content

roomote · roomote · commit 2ae05f9cd890 · 2025-11-05T01:06:56.000Z
- Added sanitizeReasoningContent function to remove tool-related XML tags - Prevents display of <appy_diff>, <switch_mode>, and other tool tags in thinking blocks - Added comprehensive tests for sanitization logic - Fixes #9041
diff --git a/src/api/providers/__tests__/xai.spec.ts b/src/api/providers/__tests__/xai.spec.ts
@@ -204,6 +204,154 @@ describe("XAIHandler", () => {
 		})
 	})
 
+	it("createMessage should sanitize tool tags from reasoning content", async () => {
+		const reasoningWithTags =
+			"I need to <apply_diff>fix this code</apply_diff> and then <switch_mode>change mode</switch_mode>"
+		const expectedSanitized = "I need to fix this code and then change mode"
+
+		// Setup mock for streaming response
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vi
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: { reasoning_content: reasoningWithTags } }],
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		// Create and consume the stream
+		const stream = handler.createMessage("system prompt", [])
+		const firstChunk = await stream.next()
+
+		// Verify the reasoning content is sanitized
+		expect(firstChunk.done).toBe(false)
+		expect(firstChunk.value).toEqual({
+			type: "reasoning",
+			text: expectedSanitized,
+		})
+	})
+
+	it("createMessage should handle complex nested tool tags in reasoning", async () => {
+		const complexReasoning = `Let me think about this...
+<read_file path="test.ts">
+This should be removed
+</read_file>
+Now I'll use <execute_command>npm test</execute_command>
+And finally <attempt_completion result="done">complete</attempt_completion>`
+
+		const expectedSanitized = `Let me think about this...
+
+This should be removed
+
+Now I'll use npm test
+And finally complete`
+
+		// Setup mock for streaming response
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vi
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: { reasoning_content: complexReasoning } }],
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		// Create and consume the stream
+		const stream = handler.createMessage("system prompt", [])
+		const firstChunk = await stream.next()
+
+		// Verify the reasoning content is properly sanitized
+		expect(firstChunk.done).toBe(false)
+		expect(firstChunk.value).toEqual({
+			type: "reasoning",
+			text: expectedSanitized,
+		})
+	})
+
+	it("createMessage should not yield reasoning if content is empty after sanitization", async () => {
+		const onlyTags = "<appy_diff></appy_diff><switch_mode></switch_mode>"
+
+		// Setup mock for streaming response
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vi
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: { reasoning_content: onlyTags } }],
+							},
+						})
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: { content: "Regular content" } }],
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		// Create and consume the stream
+		const stream = handler.createMessage("system prompt", [])
+		const firstChunk = await stream.next()
+
+		// Should skip the empty reasoning and go straight to the regular content
+		expect(firstChunk.done).toBe(false)
+		expect(firstChunk.value).toEqual({
+			type: "text",
+			text: "Regular content",
+		})
+	})
+
+	it("createMessage should preserve reasoning content without tool tags", async () => {
+		const cleanReasoning = "This is clean reasoning content without any tool tags. Just thinking about the problem."
+
+		// Setup mock for streaming response
+		mockCreate.mockImplementationOnce(() => {
+			return {
+				[Symbol.asyncIterator]: () => ({
+					next: vi
+						.fn()
+						.mockResolvedValueOnce({
+							done: false,
+							value: {
+								choices: [{ delta: { reasoning_content: cleanReasoning } }],
+							},
+						})
+						.mockResolvedValueOnce({ done: true }),
+				}),
+			}
+		})
+
+		// Create and consume the stream
+		const stream = handler.createMessage("system prompt", [])
+		const firstChunk = await stream.next()
+
+		// Verify the reasoning content is preserved as-is
+		expect(firstChunk.done).toBe(false)
+		expect(firstChunk.value).toEqual({
+			type: "reasoning",
+			text: cleanReasoning,
+		})
+	})
+
 	it("createMessage should yield usage data from stream", async () => {
 		// Setup mock for streaming response that includes usage data
 		mockCreate.mockImplementationOnce(() => {
diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts
@@ -16,6 +16,26 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 
 const XAI_DEFAULT_TEMPERATURE = 0
 
+/**
+ * Sanitizes reasoning content by removing tool-related XML/HTML tags
+ * that may appear in the model's thinking output.
+ * This prevents tags like <appy_diff>, <switch_mode>, etc. from being displayed.
+ */
+function sanitizeReasoningContent(content: string): string {
+	// Remove XML/HTML-like tags that are tool-related
+	// Matches patterns like <tag>, </tag>, <tag attr="value">, etc.
+	const toolTagPattern =
+		/<\/?(?:appy_diff|switch_mode|apply_diff|write_to_file|search_files|read_file|execute_command|list_files|insert_content|attempt_completion|ask_followup_question|update_todo_list|new_task|fetch_instructions|list_code_definition_names)[^>]*>/gi
+
+	// Remove the tool tags while preserving the content between them
+	let sanitized = content.replace(toolTagPattern, "")
+
+	// Clean up any excessive whitespace that might result from tag removal
+	sanitized = sanitized.replace(/\n{3,}/g, "\n\n").trim()
+
+	return sanitized
+}
+
 export class XAIHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -79,9 +99,12 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
 			}
 
 			if (delta && "reasoning_content" in delta && delta.reasoning_content) {
-				yield {
-					type: "reasoning",
-					text: delta.reasoning_content as string,
+				const sanitizedContent = sanitizeReasoningContent(delta.reasoning_content as string)
+				if (sanitizedContent.trim()) {
+					yield {
+						type: "reasoning",
+						text: sanitizedContent,
+					}
 				}
 			}