Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions src/api/providers/__tests__/xai.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,154 @@ describe("XAIHandler", () => {
})
})

it("createMessage should sanitize tool tags from reasoning content", async () => {
const reasoningWithTags =
"I need to <apply_diff>fix this code</apply_diff> and then <switch_mode>change mode</switch_mode>"
const expectedSanitized = "I need to fix this code and then change mode"

// Setup mock for streaming response
mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
next: vi
.fn()
.mockResolvedValueOnce({
done: false,
value: {
choices: [{ delta: { reasoning_content: reasoningWithTags } }],
},
})
.mockResolvedValueOnce({ done: true }),
}),
}
})

// Create and consume the stream
const stream = handler.createMessage("system prompt", [])
const firstChunk = await stream.next()

// Verify the reasoning content is sanitized
expect(firstChunk.done).toBe(false)
expect(firstChunk.value).toEqual({
type: "reasoning",
text: expectedSanitized,
})
})

it("createMessage should handle complex nested tool tags in reasoning", async () => {
const complexReasoning = `Let me think about this...
<read_file path="test.ts">
This should be removed
</read_file>
Now I'll use <execute_command>npm test</execute_command>
And finally <attempt_completion result="done">complete</attempt_completion>`

const expectedSanitized = `Let me think about this...

This should be removed

Now I'll use npm test
And finally complete`

// Setup mock for streaming response
mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
next: vi
.fn()
.mockResolvedValueOnce({
done: false,
value: {
choices: [{ delta: { reasoning_content: complexReasoning } }],
},
})
.mockResolvedValueOnce({ done: true }),
}),
}
})

// Create and consume the stream
const stream = handler.createMessage("system prompt", [])
const firstChunk = await stream.next()

// Verify the reasoning content is properly sanitized
expect(firstChunk.done).toBe(false)
expect(firstChunk.value).toEqual({
type: "reasoning",
text: expectedSanitized,
})
})

it("createMessage should not yield reasoning if content is empty after sanitization", async () => {
const onlyTags = "<appy_diff></appy_diff><switch_mode></switch_mode>"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typographical error: It appears that appy_diff is used instead of apply_diff. Please correct this to maintain consistency with other tests.

Suggested change
const onlyTags = "<appy_diff></appy_diff><switch_mode></switch_mode>"
const onlyTags = "<apply_diff></apply_diff><switch_mode></switch_mode>"


// Setup mock for streaming response
mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
next: vi
.fn()
.mockResolvedValueOnce({
done: false,
value: {
choices: [{ delta: { reasoning_content: onlyTags } }],
},
})
.mockResolvedValueOnce({
done: false,
value: {
choices: [{ delta: { content: "Regular content" } }],
},
})
.mockResolvedValueOnce({ done: true }),
}),
}
})

// Create and consume the stream
const stream = handler.createMessage("system prompt", [])
const firstChunk = await stream.next()

// Should skip the empty reasoning and go straight to the regular content
expect(firstChunk.done).toBe(false)
expect(firstChunk.value).toEqual({
type: "text",
text: "Regular content",
})
})

it("createMessage should preserve reasoning content without tool tags", async () => {
const cleanReasoning = "This is clean reasoning content without any tool tags. Just thinking about the problem."

// Setup mock for streaming response
mockCreate.mockImplementationOnce(() => {
return {
[Symbol.asyncIterator]: () => ({
next: vi
.fn()
.mockResolvedValueOnce({
done: false,
value: {
choices: [{ delta: { reasoning_content: cleanReasoning } }],
},
})
.mockResolvedValueOnce({ done: true }),
}),
}
})

// Create and consume the stream
const stream = handler.createMessage("system prompt", [])
const firstChunk = await stream.next()

// Verify the reasoning content is preserved as-is
expect(firstChunk.done).toBe(false)
expect(firstChunk.value).toEqual({
type: "reasoning",
text: cleanReasoning,
})
})

it("createMessage should yield usage data from stream", async () => {
// Setup mock for streaming response that includes usage data
mockCreate.mockImplementationOnce(() => {
Expand Down
29 changes: 26 additions & 3 deletions src/api/providers/xai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,26 @@ import { handleOpenAIError } from "./utils/openai-error-handler"

const XAI_DEFAULT_TEMPERATURE = 0

/**
* Sanitizes reasoning content by removing tool-related XML/HTML tags
* that may appear in the model's thinking output.
* This prevents tags like <appy_diff>, <switch_mode>, etc. from being displayed.
*/
function sanitizeReasoningContent(content: string): string {
// Remove XML/HTML-like tags that are tool-related
// Matches patterns like <tag>, </tag>, <tag attr="value">, etc.
const toolTagPattern =
/<\/?(?:appy_diff|switch_mode|apply_diff|write_to_file|search_files|read_file|execute_command|list_files|insert_content|attempt_completion|ask_followup_question|update_todo_list|new_task|fetch_instructions|list_code_definition_names)[^>]*>/gi

// Remove the tool tags while preserving the content between them
let sanitized = content.replace(toolTagPattern, "")

// Clean up any excessive whitespace that might result from tag removal
sanitized = sanitized.replace(/\n{3,}/g, "\n\n").trim()

return sanitized
}

export class XAIHandler extends BaseProvider implements SingleCompletionHandler {
protected options: ApiHandlerOptions
private client: OpenAI
Expand Down Expand Up @@ -79,9 +99,12 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: delta.reasoning_content as string,
const sanitizedContent = sanitizeReasoningContent(delta.reasoning_content as string)
if (sanitizedContent.trim()) {
yield {
type: "reasoning",
text: sanitizedContent,
}
}
}

Expand Down
Loading