fix: parse XML thinking and tool_call blocks in OpenRouter responses

roomote · roomote · commit aa8cb2f0ad98 · 2025-08-03T21:04:08.000Z
- Add XML parsing for <think> and <tool_call> blocks in OpenRouter handler - Handle incomplete XML blocks across streaming chunks - Convert tool_call blocks to user-friendly format - Add comprehensive tests for XML parsing functionality Fixes #6630
diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts
@@ -265,6 +265,199 @@ describe("OpenRouterHandler", () => {
 			const generator = handler.createMessage("test", [])
 			await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error")
 		})
+
+		it("parses <think> blocks correctly", async () => {
+			const handler = new OpenRouterHandler(mockOptions)
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "Before <think>This is thinking content</think> After" } }],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: {} }],
+						usage: { prompt_tokens: 10, completion_tokens: 20 },
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			const generator = handler.createMessage("test", [])
+			const chunks = []
+
+			for await (const chunk of generator) {
+				chunks.push(chunk)
+			}
+
+			// Should have 3 text/reasoning chunks and 1 usage chunk
+			expect(chunks).toHaveLength(4)
+			expect(chunks[0]).toEqual({ type: "text", text: "Before " })
+			expect(chunks[1]).toEqual({ type: "reasoning", text: "This is thinking content" })
+			expect(chunks[2]).toEqual({ type: "text", text: " After" })
+			expect(chunks[3]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				cacheReadTokens: undefined,
+				reasoningTokens: undefined,
+				totalCost: 0,
+			})
+		})
+
+		it("parses <tool_call> blocks correctly", async () => {
+			const handler = new OpenRouterHandler(mockOptions)
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [
+							{ delta: { content: "Text before <tool_call>Tool call content</tool_call> text after" } },
+						],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: {} }],
+						usage: { prompt_tokens: 10, completion_tokens: 20 },
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			const generator = handler.createMessage("test", [])
+			const chunks = []
+
+			for await (const chunk of generator) {
+				chunks.push(chunk)
+			}
+
+			// Should have 3 text chunks (before, tool call formatted, after) and 1 usage chunk
+			expect(chunks).toHaveLength(4)
+			expect(chunks[0]).toEqual({ type: "text", text: "Text before " })
+			expect(chunks[1]).toEqual({ type: "text", text: "[Tool Call]: Tool call content" })
+			expect(chunks[2]).toEqual({ type: "text", text: " text after" })
+			expect(chunks[3]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				cacheReadTokens: undefined,
+				reasoningTokens: undefined,
+				totalCost: 0,
+			})
+		})
+
+		it("handles nested and multiple XML blocks", async () => {
+			const handler = new OpenRouterHandler(mockOptions)
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [
+							{
+								delta: {
+									content: "<think>First think</think> middle <tool_call>Tool usage</tool_call>",
+								},
+							},
+						],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: " <think>Second think</think> end" } }],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: {} }],
+						usage: { prompt_tokens: 10, completion_tokens: 20 },
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			const generator = handler.createMessage("test", [])
+			const chunks = []
+
+			for await (const chunk of generator) {
+				chunks.push(chunk)
+			}
+
+			// Verify all chunks are parsed correctly
+			expect(chunks).toContainEqual({ type: "reasoning", text: "First think" })
+			expect(chunks).toContainEqual({ type: "text", text: " middle " })
+			expect(chunks).toContainEqual({ type: "text", text: "[Tool Call]: Tool usage" })
+			expect(chunks).toContainEqual({ type: "text", text: " " })
+			expect(chunks).toContainEqual({ type: "reasoning", text: "Second think" })
+			expect(chunks).toContainEqual({ type: "text", text: " end" })
+			expect(chunks).toContainEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				cacheReadTokens: undefined,
+				reasoningTokens: undefined,
+				totalCost: 0,
+			})
+		})
+
+		it("handles incomplete XML blocks across chunks", async () => {
+			const handler = new OpenRouterHandler(mockOptions)
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "Start <thi" } }],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "nk>Thinking content</thi" } }],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "nk> End" } }],
+					}
+					yield {
+						id: "test-id",
+						choices: [{ delta: {} }],
+						usage: { prompt_tokens: 10, completion_tokens: 20 },
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			const generator = handler.createMessage("test", [])
+			const chunks = []
+
+			for await (const chunk of generator) {
+				chunks.push(chunk)
+			}
+
+			// Should correctly parse the thinking block even when split across chunks
+			expect(chunks).toContainEqual({ type: "text", text: "Start " })
+			expect(chunks).toContainEqual({ type: "reasoning", text: "Thinking content" })
+			expect(chunks).toContainEqual({ type: "text", text: " End" })
+			expect(chunks).toContainEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				cacheReadTokens: undefined,
+				reasoningTokens: undefined,
+				totalCost: 0,
+			})
+		})
 	})
 
 	describe("completePrompt", () => {
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -18,6 +18,7 @@ import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transfor
 import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini"
 import type { OpenRouterReasoningParams } from "../transform/reasoning"
 import { getModelParams } from "../transform/model-params"
+import { XmlMatcher } from "../../utils/xml-matcher"
 
 import { getModels } from "./fetchers/modelCache"
 import { getModelEndpoints } from "./fetchers/modelEndpointCache"
@@ -137,6 +138,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		const stream = await this.client.chat.completions.create(completionParams)
 
 		let lastUsage: CompletionUsage | undefined = undefined
+		let buffer = ""
 
 		for await (const chunk of stream) {
 			// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
@@ -153,14 +155,77 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			}
 
 			if (delta?.content) {
-				yield { type: "text", text: delta.content }
+				buffer += delta.content
+
+				// Process complete XML blocks
+				let processed = true
+				while (processed) {
+					processed = false
+
+					// Check for complete <think> blocks
+					const thinkMatch = buffer.match(/^(.*?)<think>([\s\S]*?)<\/think>(.*)$/s)
+					if (thinkMatch) {
+						const [, before, content, after] = thinkMatch
+						if (before) {
+							yield { type: "text", text: before }
+						}
+						yield { type: "reasoning", text: content }
+						buffer = after
+						processed = true
+						continue
+					}
+
+					// Check for complete <tool_call> blocks
+					const toolMatch = buffer.match(/^(.*?)<tool_call>([\s\S]*?)<\/tool_call>(.*)$/s)
+					if (toolMatch) {
+						const [, before, content, after] = toolMatch
+						if (before) {
+							yield { type: "text", text: before }
+						}
+						yield { type: "text", text: `[Tool Call]: ${content}` }
+						buffer = after
+						processed = true
+						continue
+					}
+
+					// Check if we have an incomplete tag at the end
+					const incompleteTag = buffer.match(/^(.*?)(<(?:think|tool_call)[^>]*(?:>[\s\S]*)?)?$/s)
+					if (incompleteTag && incompleteTag[2]) {
+						// We have an incomplete tag, yield the text before it and keep the tag in buffer
+						const [, before, tag] = incompleteTag
+						if (before) {
+							yield { type: "text", text: before }
+							buffer = tag
+						}
+						break
+					}
+
+					// No tags found or incomplete, yield all content except potential start of a tag
+					const tagStart = buffer.lastIndexOf("<")
+					if (tagStart === -1) {
+						// No < found, yield all
+						if (buffer) {
+							yield { type: "text", text: buffer }
+							buffer = ""
+						}
+					} else if (tagStart > 0) {
+						// Yield content before the <
+						yield { type: "text", text: buffer.substring(0, tagStart) }
+						buffer = buffer.substring(tagStart)
+					}
+				}
 			}
 
 			if (chunk.usage) {
 				lastUsage = chunk.usage
 			}
 		}
 
+		// Process any remaining content in the buffer
+		if (buffer) {
+			yield { type: "text", text: buffer }
+		}
+
 		if (lastUsage) {
 			yield {
 				type: "usage",