Skip to content

Commit 2ae05f9

Browse files
committed
fix: sanitize tool tags from XAI reasoning content
- Added sanitizeReasoningContent function to remove tool-related XML tags - Prevents display of <appy_diff>, <switch_mode>, and other tool tags in thinking blocks - Added comprehensive tests for sanitization logic - Fixes #9041
1 parent 54745fc commit 2ae05f9

File tree

2 files changed

+174
-3
lines changed

2 files changed

+174
-3
lines changed

src/api/providers/__tests__/xai.spec.ts

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,154 @@ describe("XAIHandler", () => {
204204
})
205205
})
206206

207+
it("createMessage should sanitize tool tags from reasoning content", async () => {
208+
const reasoningWithTags =
209+
"I need to <apply_diff>fix this code</apply_diff> and then <switch_mode>change mode</switch_mode>"
210+
const expectedSanitized = "I need to fix this code and then change mode"
211+
212+
// Setup mock for streaming response
213+
mockCreate.mockImplementationOnce(() => {
214+
return {
215+
[Symbol.asyncIterator]: () => ({
216+
next: vi
217+
.fn()
218+
.mockResolvedValueOnce({
219+
done: false,
220+
value: {
221+
choices: [{ delta: { reasoning_content: reasoningWithTags } }],
222+
},
223+
})
224+
.mockResolvedValueOnce({ done: true }),
225+
}),
226+
}
227+
})
228+
229+
// Create and consume the stream
230+
const stream = handler.createMessage("system prompt", [])
231+
const firstChunk = await stream.next()
232+
233+
// Verify the reasoning content is sanitized
234+
expect(firstChunk.done).toBe(false)
235+
expect(firstChunk.value).toEqual({
236+
type: "reasoning",
237+
text: expectedSanitized,
238+
})
239+
})
240+
241+
it("createMessage should handle complex nested tool tags in reasoning", async () => {
242+
const complexReasoning = `Let me think about this...
243+
<read_file path="test.ts">
244+
This should be removed
245+
</read_file>
246+
Now I'll use <execute_command>npm test</execute_command>
247+
And finally <attempt_completion result="done">complete</attempt_completion>`
248+
249+
const expectedSanitized = `Let me think about this...
250+
251+
This should be removed
252+
253+
Now I'll use npm test
254+
And finally complete`
255+
256+
// Setup mock for streaming response
257+
mockCreate.mockImplementationOnce(() => {
258+
return {
259+
[Symbol.asyncIterator]: () => ({
260+
next: vi
261+
.fn()
262+
.mockResolvedValueOnce({
263+
done: false,
264+
value: {
265+
choices: [{ delta: { reasoning_content: complexReasoning } }],
266+
},
267+
})
268+
.mockResolvedValueOnce({ done: true }),
269+
}),
270+
}
271+
})
272+
273+
// Create and consume the stream
274+
const stream = handler.createMessage("system prompt", [])
275+
const firstChunk = await stream.next()
276+
277+
// Verify the reasoning content is properly sanitized
278+
expect(firstChunk.done).toBe(false)
279+
expect(firstChunk.value).toEqual({
280+
type: "reasoning",
281+
text: expectedSanitized,
282+
})
283+
})
284+
285+
it("createMessage should not yield reasoning if content is empty after sanitization", async () => {
286+
const onlyTags = "<appy_diff></appy_diff><switch_mode></switch_mode>"
287+
288+
// Setup mock for streaming response
289+
mockCreate.mockImplementationOnce(() => {
290+
return {
291+
[Symbol.asyncIterator]: () => ({
292+
next: vi
293+
.fn()
294+
.mockResolvedValueOnce({
295+
done: false,
296+
value: {
297+
choices: [{ delta: { reasoning_content: onlyTags } }],
298+
},
299+
})
300+
.mockResolvedValueOnce({
301+
done: false,
302+
value: {
303+
choices: [{ delta: { content: "Regular content" } }],
304+
},
305+
})
306+
.mockResolvedValueOnce({ done: true }),
307+
}),
308+
}
309+
})
310+
311+
// Create and consume the stream
312+
const stream = handler.createMessage("system prompt", [])
313+
const firstChunk = await stream.next()
314+
315+
// Should skip the empty reasoning and go straight to the regular content
316+
expect(firstChunk.done).toBe(false)
317+
expect(firstChunk.value).toEqual({
318+
type: "text",
319+
text: "Regular content",
320+
})
321+
})
322+
323+
it("createMessage should preserve reasoning content without tool tags", async () => {
324+
const cleanReasoning = "This is clean reasoning content without any tool tags. Just thinking about the problem."
325+
326+
// Setup mock for streaming response
327+
mockCreate.mockImplementationOnce(() => {
328+
return {
329+
[Symbol.asyncIterator]: () => ({
330+
next: vi
331+
.fn()
332+
.mockResolvedValueOnce({
333+
done: false,
334+
value: {
335+
choices: [{ delta: { reasoning_content: cleanReasoning } }],
336+
},
337+
})
338+
.mockResolvedValueOnce({ done: true }),
339+
}),
340+
}
341+
})
342+
343+
// Create and consume the stream
344+
const stream = handler.createMessage("system prompt", [])
345+
const firstChunk = await stream.next()
346+
347+
// Verify the reasoning content is preserved as-is
348+
expect(firstChunk.done).toBe(false)
349+
expect(firstChunk.value).toEqual({
350+
type: "reasoning",
351+
text: cleanReasoning,
352+
})
353+
})
354+
207355
it("createMessage should yield usage data from stream", async () => {
208356
// Setup mock for streaming response that includes usage data
209357
mockCreate.mockImplementationOnce(() => {

src/api/providers/xai.ts

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,26 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
1616

1717
const XAI_DEFAULT_TEMPERATURE = 0
1818

19+
/**
20+
* Sanitizes reasoning content by removing tool-related XML/HTML tags
21+
* that may appear in the model's thinking output.
22+
* This prevents tags like <appy_diff>, <switch_mode>, etc. from being displayed.
23+
*/
24+
function sanitizeReasoningContent(content: string): string {
25+
// Remove XML/HTML-like tags that are tool-related
26+
// Matches patterns like <tag>, </tag>, <tag attr="value">, etc.
27+
const toolTagPattern =
28+
/<\/?(?:appy_diff|switch_mode|apply_diff|write_to_file|search_files|read_file|execute_command|list_files|insert_content|attempt_completion|ask_followup_question|update_todo_list|new_task|fetch_instructions|list_code_definition_names)[^>]*>/gi
29+
30+
// Remove the tool tags while preserving the content between them
31+
let sanitized = content.replace(toolTagPattern, "")
32+
33+
// Clean up any excessive whitespace that might result from tag removal
34+
sanitized = sanitized.replace(/\n{3,}/g, "\n\n").trim()
35+
36+
return sanitized
37+
}
38+
1939
export class XAIHandler extends BaseProvider implements SingleCompletionHandler {
2040
protected options: ApiHandlerOptions
2141
private client: OpenAI
@@ -79,9 +99,12 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
7999
}
80100

81101
if (delta && "reasoning_content" in delta && delta.reasoning_content) {
82-
yield {
83-
type: "reasoning",
84-
text: delta.reasoning_content as string,
102+
const sanitizedContent = sanitizeReasoningContent(delta.reasoning_content as string)
103+
if (sanitizedContent.trim()) {
104+
yield {
105+
type: "reasoning",
106+
text: sanitizedContent,
107+
}
85108
}
86109
}
87110

0 commit comments

Comments
 (0)