Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions src/api/providers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,199 @@ describe("OpenRouterHandler", () => {
const generator = handler.createMessage("test", [])
await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error")
})

it("parses <think> blocks correctly", async () => {
const handler = new OpenRouterHandler(mockOptions)
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [{ delta: { content: "Before <think>This is thinking content</think> After" } }],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

const generator = handler.createMessage("test", [])
const chunks = []

for await (const chunk of generator) {
chunks.push(chunk)
}

// Should have 3 text/reasoning chunks and 1 usage chunk
expect(chunks).toHaveLength(4)
expect(chunks[0]).toEqual({ type: "text", text: "Before " })
expect(chunks[1]).toEqual({ type: "reasoning", text: "This is thinking content" })
expect(chunks[2]).toEqual({ type: "text", text: " After" })
expect(chunks[3]).toEqual({
type: "usage",
inputTokens: 10,
outputTokens: 20,
cacheReadTokens: undefined,
reasoningTokens: undefined,
totalCost: 0,
})
})

it("parses <tool_call> blocks correctly", async () => {
const handler = new OpenRouterHandler(mockOptions)
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [
{ delta: { content: "Text before <tool_call>Tool call content</tool_call> text after" } },
],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

const generator = handler.createMessage("test", [])
const chunks = []

for await (const chunk of generator) {
chunks.push(chunk)
}

// Should have 3 text chunks (before, tool call formatted, after) and 1 usage chunk
expect(chunks).toHaveLength(4)
expect(chunks[0]).toEqual({ type: "text", text: "Text before " })
expect(chunks[1]).toEqual({ type: "text", text: "[Tool Call]: Tool call content" })
expect(chunks[2]).toEqual({ type: "text", text: " text after" })
expect(chunks[3]).toEqual({
type: "usage",
inputTokens: 10,
outputTokens: 20,
cacheReadTokens: undefined,
reasoningTokens: undefined,
totalCost: 0,
})
})

it("handles nested and multiple XML blocks", async () => {
const handler = new OpenRouterHandler(mockOptions)
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [
{
delta: {
content: "<think>First think</think> middle <tool_call>Tool usage</tool_call>",
},
},
],
}
yield {
id: "test-id",
choices: [{ delta: { content: " <think>Second think</think> end" } }],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

const generator = handler.createMessage("test", [])
const chunks = []

for await (const chunk of generator) {
chunks.push(chunk)
}

// Verify all chunks are parsed correctly
expect(chunks).toContainEqual({ type: "reasoning", text: "First think" })
expect(chunks).toContainEqual({ type: "text", text: " middle " })
expect(chunks).toContainEqual({ type: "text", text: "[Tool Call]: Tool usage" })
expect(chunks).toContainEqual({ type: "text", text: " " })
expect(chunks).toContainEqual({ type: "reasoning", text: "Second think" })
expect(chunks).toContainEqual({ type: "text", text: " end" })
expect(chunks).toContainEqual({
type: "usage",
inputTokens: 10,
outputTokens: 20,
cacheReadTokens: undefined,
reasoningTokens: undefined,
totalCost: 0,
})
})

it("handles incomplete XML blocks across chunks", async () => {
const handler = new OpenRouterHandler(mockOptions)
const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [{ delta: { content: "Start <thi" } }],
}
yield {
id: "test-id",
choices: [{ delta: { content: "nk>Thinking content</thi" } }],
}
yield {
id: "test-id",
choices: [{ delta: { content: "nk> End" } }],
}
yield {
id: "test-id",
choices: [{ delta: {} }],
usage: { prompt_tokens: 10, completion_tokens: 20 },
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

const generator = handler.createMessage("test", [])
const chunks = []

for await (const chunk of generator) {
chunks.push(chunk)
}

// Should correctly parse the thinking block even when split across chunks
expect(chunks).toContainEqual({ type: "text", text: "Start " })
expect(chunks).toContainEqual({ type: "reasoning", text: "Thinking content" })
expect(chunks).toContainEqual({ type: "text", text: " End" })
expect(chunks).toContainEqual({
type: "usage",
inputTokens: 10,
outputTokens: 20,
cacheReadTokens: undefined,
reasoningTokens: undefined,
totalCost: 0,
})
})
})

describe("completePrompt", () => {
Expand Down
67 changes: 66 additions & 1 deletion src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { addCacheBreakpoints as addAnthropicCacheBreakpoints } from "../transfor
import { addCacheBreakpoints as addGeminiCacheBreakpoints } from "../transform/caching/gemini"
import type { OpenRouterReasoningParams } from "../transform/reasoning"
import { getModelParams } from "../transform/model-params"
import { XmlMatcher } from "../../utils/xml-matcher"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove unused import 'XmlMatcher' if it's not needed to avoid confusion.

Suggested change
import { XmlMatcher } from "../../utils/xml-matcher"

This comment was generated because it violated a code review rule: irule_Vw7dJWzvznOJagxS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I notice we're importing XmlMatcher but not using it. Since we've implemented custom XML parsing logic below, should we remove this unused import? Or perhaps we could consider using the existing XmlMatcher utility instead of the manual implementation?


import { getModels } from "./fetchers/modelCache"
import { getModelEndpoints } from "./fetchers/modelEndpointCache"
Expand Down Expand Up @@ -137,6 +138,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
const stream = await this.client.chat.completions.create(completionParams)

let lastUsage: CompletionUsage | undefined = undefined
let buffer = ""

for await (const chunk of stream) {
// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
Expand All @@ -153,14 +155,77 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}

if (delta?.content) {
yield { type: "text", text: delta.content }
buffer += delta.content

// Process complete XML blocks
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider refactoring the inline XML parsing logic (lines 158–216) into a shared utility (or use the imported XmlMatcher) for improved readability and maintainability.

This comment was generated because it violated a code review rule: irule_tTqpIuNs8DV0QFGj.

let processed = true
while (processed) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For large responses with many XML blocks, this while loop with multiple regex matches could impact performance. Have we considered the performance implications? Perhaps we could optimize by combining the regex patterns or using a different parsing approach?

processed = false

// Check for complete <think> blocks
const thinkMatch = buffer.match(/^(.*?)<think>([\s\S]*?)<\/think>(.*)$/s)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex patterns assume well-formed XML. What happens if the model sends malformed XML like <think>content without closing tag or nested tags? The current implementation might not handle these edge cases gracefully. Should we add some validation or fallback behavior?

if (thinkMatch) {
const [, before, content, after] = thinkMatch
if (before) {
yield { type: "text", text: before }
}
yield { type: "reasoning", text: content }
buffer = after
processed = true
continue
}

// Check for complete <tool_call> blocks
const toolMatch = buffer.match(/^(.*?)<tool_call>([\s\S]*?)<\/tool_call>(.*)$/s)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic for handling <think> and <tool_call> blocks is nearly identical. Could we refactor this into a helper function to reduce duplication? Something like:

Suggested change
const toolMatch = buffer.match(/^(.*?)<tool_call>([\s\S]*?)<\/tool_call>(.*)$/s)
// Helper function to process XML blocks
const processXmlBlock = (buffer: string, tagName: string, transform?: (content: string) => any) => {
const regex = new RegExp(`^(.*?)<${tagName}>([\s\S]*?)<\/${tagName}>(.*)$`, 's');
const match = buffer.match(regex);
if (match) {
const [, before, content, after] = match;
return { matched: true, before, content: transform ? transform(content) : content, after };
}
return { matched: false };
};

if (toolMatch) {
const [, before, content, after] = toolMatch
if (before) {
yield { type: "text", text: before }
}
yield { type: "text", text: `[Tool Call]: ${content}` }
buffer = after
processed = true
continue
}

// Check if we have an incomplete tag at the end
const incompleteTag = buffer.match(/^(.*?)(<(?:think|tool_call)[^>]*(?:>[\s\S]*)?)?$/s)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This buffer management logic for incomplete tags is complex. Could we add some inline comments explaining the different scenarios? For example:

  • When we have an incomplete tag at the end
  • When we need to preserve partial tag content
  • When it's safe to yield all content

This would improve maintainability for future developers (including myself in 5 minutes).

if (incompleteTag && incompleteTag[2]) {
// We have an incomplete tag, yield the text before it and keep the tag in buffer
const [, before, tag] = incompleteTag
if (before) {
yield { type: "text", text: before }
buffer = tag
}
break
}

// No tags found or incomplete, yield all content except potential start of a tag
const tagStart = buffer.lastIndexOf("<")
if (tagStart === -1) {
// No < found, yield all
if (buffer) {
yield { type: "text", text: buffer }
buffer = ""
}
} else if (tagStart > 0) {
// Yield content before the <
yield { type: "text", text: buffer.substring(0, tagStart) }
buffer = buffer.substring(tagStart)
}
}
}

if (chunk.usage) {
lastUsage = chunk.usage
}
}

// Process any remaining content in the buffer
if (buffer) {
yield { type: "text", text: buffer }
}

if (lastUsage) {
yield {
type: "usage",
Expand Down
Loading