Skip to content

Commit aa8cb2f

Browse files
committed
fix: parse XML thinking and tool_call blocks in OpenRouter responses
- Add XML parsing for <think> and <tool_call> blocks in OpenRouter handler - Handle incomplete XML blocks across streaming chunks - Convert tool_call blocks to user-friendly format - Add comprehensive tests for XML parsing functionality Fixes #6630
1 parent a88238f commit aa8cb2f

File tree

2 files changed

+259
-1
lines changed

2 files changed

+259
-1
lines changed

src/api/providers/__tests__/openrouter.spec.ts

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,199 @@ describe("OpenRouterHandler", () => {
265265
const generator = handler.createMessage("test", [])
266266
await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error")
267267
})
268+
269+
it("parses <think> blocks correctly", async () => {
270+
const handler = new OpenRouterHandler(mockOptions)
271+
const mockStream = {
272+
async *[Symbol.asyncIterator]() {
273+
yield {
274+
id: "test-id",
275+
choices: [{ delta: { content: "Before <think>This is thinking content</think> After" } }],
276+
}
277+
yield {
278+
id: "test-id",
279+
choices: [{ delta: {} }],
280+
usage: { prompt_tokens: 10, completion_tokens: 20 },
281+
}
282+
},
283+
}
284+
285+
const mockCreate = vitest.fn().mockResolvedValue(mockStream)
286+
;(OpenAI as any).prototype.chat = {
287+
completions: { create: mockCreate },
288+
} as any
289+
290+
const generator = handler.createMessage("test", [])
291+
const chunks = []
292+
293+
for await (const chunk of generator) {
294+
chunks.push(chunk)
295+
}
296+
297+
// Should have 3 text/reasoning chunks and 1 usage chunk
298+
expect(chunks).toHaveLength(4)
299+
expect(chunks[0]).toEqual({ type: "text", text: "Before " })
300+
expect(chunks[1]).toEqual({ type: "reasoning", text: "This is thinking content" })
301+
expect(chunks[2]).toEqual({ type: "text", text: " After" })
302+
expect(chunks[3]).toEqual({
303+
type: "usage",
304+
inputTokens: 10,
305+
outputTokens: 20,
306+
cacheReadTokens: undefined,
307+
reasoningTokens: undefined,
308+
totalCost: 0,
309+
})
310+
})
311+
312+
it("parses <tool_call> blocks correctly", async () => {
313+
const handler = new OpenRouterHandler(mockOptions)
314+
const mockStream = {
315+
async *[Symbol.asyncIterator]() {
316+
yield {
317+
id: "test-id",
318+
choices: [
319+
{ delta: { content: "Text before <tool_call>Tool call content</tool_call> text after" } },
320+
],
321+
}
322+
yield {
323+
id: "test-id",
324+
choices: [{ delta: {} }],
325+
usage: { prompt_tokens: 10, completion_tokens: 20 },
326+
}
327+
},
328+
}
329+
330+
const mockCreate = vitest.fn().mockResolvedValue(mockStream)
331+
;(OpenAI as any).prototype.chat = {
332+
completions: { create: mockCreate },
333+
} as any
334+
335+
const generator = handler.createMessage("test", [])
336+
const chunks = []
337+
338+
for await (const chunk of generator) {
339+
chunks.push(chunk)
340+
}
341+
342+
// Should have 3 text chunks (before, tool call formatted, after) and 1 usage chunk
343+
expect(chunks).toHaveLength(4)
344+
expect(chunks[0]).toEqual({ type: "text", text: "Text before " })
345+
expect(chunks[1]).toEqual({ type: "text", text: "[Tool Call]: Tool call content" })
346+
expect(chunks[2]).toEqual({ type: "text", text: " text after" })
347+
expect(chunks[3]).toEqual({
348+
type: "usage",
349+
inputTokens: 10,
350+
outputTokens: 20,
351+
cacheReadTokens: undefined,
352+
reasoningTokens: undefined,
353+
totalCost: 0,
354+
})
355+
})
356+
357+
it("handles nested and multiple XML blocks", async () => {
358+
const handler = new OpenRouterHandler(mockOptions)
359+
const mockStream = {
360+
async *[Symbol.asyncIterator]() {
361+
yield {
362+
id: "test-id",
363+
choices: [
364+
{
365+
delta: {
366+
content: "<think>First think</think> middle <tool_call>Tool usage</tool_call>",
367+
},
368+
},
369+
],
370+
}
371+
yield {
372+
id: "test-id",
373+
choices: [{ delta: { content: " <think>Second think</think> end" } }],
374+
}
375+
yield {
376+
id: "test-id",
377+
choices: [{ delta: {} }],
378+
usage: { prompt_tokens: 10, completion_tokens: 20 },
379+
}
380+
},
381+
}
382+
383+
const mockCreate = vitest.fn().mockResolvedValue(mockStream)
384+
;(OpenAI as any).prototype.chat = {
385+
completions: { create: mockCreate },
386+
} as any
387+
388+
const generator = handler.createMessage("test", [])
389+
const chunks = []
390+
391+
for await (const chunk of generator) {
392+
chunks.push(chunk)
393+
}
394+
395+
// Verify all chunks are parsed correctly
396+
expect(chunks).toContainEqual({ type: "reasoning", text: "First think" })
397+
expect(chunks).toContainEqual({ type: "text", text: " middle " })
398+
expect(chunks).toContainEqual({ type: "text", text: "[Tool Call]: Tool usage" })
399+
expect(chunks).toContainEqual({ type: "text", text: " " })
400+
expect(chunks).toContainEqual({ type: "reasoning", text: "Second think" })
401+
expect(chunks).toContainEqual({ type: "text", text: " end" })
402+
expect(chunks).toContainEqual({
403+
type: "usage",
404+
inputTokens: 10,
405+
outputTokens: 20,
406+
cacheReadTokens: undefined,
407+
reasoningTokens: undefined,
408+
totalCost: 0,
409+
})
410+
})
411+
412+
it("handles incomplete XML blocks across chunks", async () => {
413+
const handler = new OpenRouterHandler(mockOptions)
414+
const mockStream = {
415+
async *[Symbol.asyncIterator]() {
416+
yield {
417+
id: "test-id",
418+
choices: [{ delta: { content: "Start <thi" } }],
419+
}
420+
yield {
421+
id: "test-id",
422+
choices: [{ delta: { content: "nk>Thinking content</thi" } }],
423+
}
424+
yield {
425+
id: "test-id",
426+
choices: [{ delta: { content: "nk> End" } }],
427+
}
428+
yield {
429+
id: "test-id",
430+
choices: [{ delta: {} }],
431+
usage: { prompt_tokens: 10, completion_tokens: 20 },
432+
}
433+
},
434+
}
435+
436+
const mockCreate = vitest.fn().mockResolvedValue(mockStream)
437+
;(OpenAI as any).prototype.chat = {
438+
completions: { create: mockCreate },
439+
} as any
440+
441+
const generator = handler.createMessage("test", [])
442+
const chunks = []
443+
444+
for await (const chunk of generator) {
445+
chunks.push(chunk)
446+
}
447+
448+
// Should correctly parse the thinking block even when split across chunks
449+
expect(chunks).toContainEqual({ type: "text", text: "Start " })
450+
expect(chunks).toContainEqual({ type: "reasoning", text: "Thinking content" })
451+
expect(chunks).toContainEqual({ type: "text", text: " End" })
452+
expect(chunks).toContainEqual({
453+
type: "usage",
454+
inputTokens: 10,
455+
outputTokens: 20,
456+
cacheReadTokens: undefined,
457+
reasoningTokens: undefined,
458+
totalCost: 0,
459+
})
460+
})
268461
})
269462

270463
describe("completePrompt", () => {

src/api/providers/openrouter.ts

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transfor
1818
import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini"
1919
import type { OpenRouterReasoningParams } from "../transform/reasoning"
2020
import { getModelParams } from "../transform/model-params"
21+
import { XmlMatcher } from "../../utils/xml-matcher"
2122

2223
import { getModels } from "./fetchers/modelCache"
2324
import { getModelEndpoints } from "./fetchers/modelEndpointCache"
@@ -137,6 +138,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
137138
const stream = await this.client.chat.completions.create(completionParams)
138139

139140
let lastUsage: CompletionUsage | undefined = undefined
141+
let buffer = ""
140142

141143
for await (const chunk of stream) {
142144
// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
@@ -153,14 +155,77 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
153155
}
154156

155157
if (delta?.content) {
156-
yield { type: "text", text: delta.content }
158+
buffer += delta.content
159+
160+
// Process complete XML blocks
161+
let processed = true
162+
while (processed) {
163+
processed = false
164+
165+
// Check for complete <think> blocks
166+
const thinkMatch = buffer.match(/^(.*?)<think>([\s\S]*?)<\/think>(.*)$/s)
167+
if (thinkMatch) {
168+
const [, before, content, after] = thinkMatch
169+
if (before) {
170+
yield { type: "text", text: before }
171+
}
172+
yield { type: "reasoning", text: content }
173+
buffer = after
174+
processed = true
175+
continue
176+
}
177+
178+
// Check for complete <tool_call> blocks
179+
const toolMatch = buffer.match(/^(.*?)<tool_call>([\s\S]*?)<\/tool_call>(.*)$/s)
180+
if (toolMatch) {
181+
const [, before, content, after] = toolMatch
182+
if (before) {
183+
yield { type: "text", text: before }
184+
}
185+
yield { type: "text", text: `[Tool Call]: ${content}` }
186+
buffer = after
187+
processed = true
188+
continue
189+
}
190+
191+
// Check if we have an incomplete tag at the end
192+
const incompleteTag = buffer.match(/^(.*?)(<(?:think|tool_call)[^>]*(?:>[\s\S]*)?)?$/s)
193+
if (incompleteTag && incompleteTag[2]) {
194+
// We have an incomplete tag, yield the text before it and keep the tag in buffer
195+
const [, before, tag] = incompleteTag
196+
if (before) {
197+
yield { type: "text", text: before }
198+
buffer = tag
199+
}
200+
break
201+
}
202+
203+
// No tags found or incomplete, yield all content except potential start of a tag
204+
const tagStart = buffer.lastIndexOf("<")
205+
if (tagStart === -1) {
206+
// No < found, yield all
207+
if (buffer) {
208+
yield { type: "text", text: buffer }
209+
buffer = ""
210+
}
211+
} else if (tagStart > 0) {
212+
// Yield content before the <
213+
yield { type: "text", text: buffer.substring(0, tagStart) }
214+
buffer = buffer.substring(tagStart)
215+
}
216+
}
157217
}
158218

159219
if (chunk.usage) {
160220
lastUsage = chunk.usage
161221
}
162222
}
163223

224+
// Process any remaining content in the buffer
225+
if (buffer) {
226+
yield { type: "text", text: buffer }
227+
}
228+
164229
if (lastUsage) {
165230
yield {
166231
type: "usage",

0 commit comments

Comments
 (0)