@@ -204,6 +204,154 @@ describe("XAIHandler", () => {
204204 } )
205205 } )
206206
207+ it ( "createMessage should sanitize tool tags from reasoning content" , async ( ) => {
208+ const reasoningWithTags =
209+ "I need to <apply_diff>fix this code</apply_diff> and then <switch_mode>change mode</switch_mode>"
210+ const expectedSanitized = "I need to fix this code and then change mode"
211+
212+ // Setup mock for streaming response
213+ mockCreate . mockImplementationOnce ( ( ) => {
214+ return {
215+ [ Symbol . asyncIterator ] : ( ) => ( {
216+ next : vi
217+ . fn ( )
218+ . mockResolvedValueOnce ( {
219+ done : false ,
220+ value : {
221+ choices : [ { delta : { reasoning_content : reasoningWithTags } } ] ,
222+ } ,
223+ } )
224+ . mockResolvedValueOnce ( { done : true } ) ,
225+ } ) ,
226+ }
227+ } )
228+
229+ // Create and consume the stream
230+ const stream = handler . createMessage ( "system prompt" , [ ] )
231+ const firstChunk = await stream . next ( )
232+
233+ // Verify the reasoning content is sanitized
234+ expect ( firstChunk . done ) . toBe ( false )
235+ expect ( firstChunk . value ) . toEqual ( {
236+ type : "reasoning" ,
237+ text : expectedSanitized ,
238+ } )
239+ } )
240+
241+ it ( "createMessage should handle complex nested tool tags in reasoning" , async ( ) => {
242+ const complexReasoning = `Let me think about this...
243+ <read_file path="test.ts">
244+ This should be removed
245+ </read_file>
246+ Now I'll use <execute_command>npm test</execute_command>
247+ And finally <attempt_completion result="done">complete</attempt_completion>`
248+
249+ const expectedSanitized = `Let me think about this...
250+
251+ This should be removed
252+
253+ Now I'll use npm test
254+ And finally complete`
255+
256+ // Setup mock for streaming response
257+ mockCreate . mockImplementationOnce ( ( ) => {
258+ return {
259+ [ Symbol . asyncIterator ] : ( ) => ( {
260+ next : vi
261+ . fn ( )
262+ . mockResolvedValueOnce ( {
263+ done : false ,
264+ value : {
265+ choices : [ { delta : { reasoning_content : complexReasoning } } ] ,
266+ } ,
267+ } )
268+ . mockResolvedValueOnce ( { done : true } ) ,
269+ } ) ,
270+ }
271+ } )
272+
273+ // Create and consume the stream
274+ const stream = handler . createMessage ( "system prompt" , [ ] )
275+ const firstChunk = await stream . next ( )
276+
277+ // Verify the reasoning content is properly sanitized
278+ expect ( firstChunk . done ) . toBe ( false )
279+ expect ( firstChunk . value ) . toEqual ( {
280+ type : "reasoning" ,
281+ text : expectedSanitized ,
282+ } )
283+ } )
284+
285+ it ( "createMessage should not yield reasoning if content is empty after sanitization" , async ( ) => {
286+ const onlyTags = "<appy_diff></appy_diff><switch_mode></switch_mode>"
287+
288+ // Setup mock for streaming response
289+ mockCreate . mockImplementationOnce ( ( ) => {
290+ return {
291+ [ Symbol . asyncIterator ] : ( ) => ( {
292+ next : vi
293+ . fn ( )
294+ . mockResolvedValueOnce ( {
295+ done : false ,
296+ value : {
297+ choices : [ { delta : { reasoning_content : onlyTags } } ] ,
298+ } ,
299+ } )
300+ . mockResolvedValueOnce ( {
301+ done : false ,
302+ value : {
303+ choices : [ { delta : { content : "Regular content" } } ] ,
304+ } ,
305+ } )
306+ . mockResolvedValueOnce ( { done : true } ) ,
307+ } ) ,
308+ }
309+ } )
310+
311+ // Create and consume the stream
312+ const stream = handler . createMessage ( "system prompt" , [ ] )
313+ const firstChunk = await stream . next ( )
314+
315+ // Should skip the empty reasoning and go straight to the regular content
316+ expect ( firstChunk . done ) . toBe ( false )
317+ expect ( firstChunk . value ) . toEqual ( {
318+ type : "text" ,
319+ text : "Regular content" ,
320+ } )
321+ } )
322+
323+ it ( "createMessage should preserve reasoning content without tool tags" , async ( ) => {
324+ const cleanReasoning = "This is clean reasoning content without any tool tags. Just thinking about the problem."
325+
326+ // Setup mock for streaming response
327+ mockCreate . mockImplementationOnce ( ( ) => {
328+ return {
329+ [ Symbol . asyncIterator ] : ( ) => ( {
330+ next : vi
331+ . fn ( )
332+ . mockResolvedValueOnce ( {
333+ done : false ,
334+ value : {
335+ choices : [ { delta : { reasoning_content : cleanReasoning } } ] ,
336+ } ,
337+ } )
338+ . mockResolvedValueOnce ( { done : true } ) ,
339+ } ) ,
340+ }
341+ } )
342+
343+ // Create and consume the stream
344+ const stream = handler . createMessage ( "system prompt" , [ ] )
345+ const firstChunk = await stream . next ( )
346+
347+ // Verify the reasoning content is preserved as-is
348+ expect ( firstChunk . done ) . toBe ( false )
349+ expect ( firstChunk . value ) . toEqual ( {
350+ type : "reasoning" ,
351+ text : cleanReasoning ,
352+ } )
353+ } )
354+
207355 it ( "createMessage should yield usage data from stream" , async ( ) => {
208356 // Setup mock for streaming response that includes usage data
209357 mockCreate . mockImplementationOnce ( ( ) => {
0 commit comments