Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions src/api/providers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ vitest.mock("../fetchers/modelCache", () => ({
cacheReadsPrice: 0.3,
description: "Claude 3.7 Sonnet with thinking",
},
"deepseek/deepseek-v3.1-terminus": {
maxTokens: 8192,
contextWindow: 128000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0.3,
outputPrice: 1.2,
description: "DeepSeek V3.1 Terminus",
supportsReasoningEffort: true,
supportedReasoningEfforts: ["low", "medium", "high"],
},
})
}),
}))
Expand Down Expand Up @@ -330,4 +341,144 @@ describe("OpenRouterHandler", () => {
await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error")
})
})

describe("DeepSeek V3.1 Terminus handling", () => {
it("should use chat_template_kwargs with thinking:true when reasoning is enabled for V3.1 Terminus", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "deepseek/deepseek-v3.1-terminus",
reasoningEffort: "medium",
})

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [{ delta: { content: "test response" } }],
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

await handler.createMessage("test", []).next()

// Should include chat_template_kwargs with thinking:true and NOT include reasoning parameter
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "deepseek/deepseek-v3.1-terminus",
chat_template_kwargs: { thinking: true },
}),
)
// Ensure reasoning parameter is NOT included
expect(mockCreate).not.toHaveBeenCalledWith(
expect.objectContaining({
reasoning: expect.anything(),
}),
)
})

it("should use chat_template_kwargs with thinking:false when reasoning is disabled for V3.1 Terminus", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "deepseek/deepseek-v3.1-terminus",
// No reasoning effort specified
})

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [{ delta: { content: "test response" } }],
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

await handler.createMessage("test", []).next()

// Should include chat_template_kwargs with thinking:false
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "deepseek/deepseek-v3.1-terminus",
chat_template_kwargs: { thinking: false },
}),
)
// Ensure reasoning parameter is NOT included
expect(mockCreate).not.toHaveBeenCalledWith(
expect.objectContaining({
reasoning: expect.anything(),
}),
)
})

it("should not use chat_template_kwargs for non-Terminus models", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "anthropic/claude-sonnet-4",
reasoningEffort: "medium",
})

const mockStream = {
async *[Symbol.asyncIterator]() {
yield {
id: "test-id",
choices: [{ delta: { content: "test response" } }],
}
},
}

const mockCreate = vitest.fn().mockResolvedValue(mockStream)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

await handler.createMessage("test", []).next()

// Should NOT include chat_template_kwargs for non-Terminus models
expect(mockCreate).not.toHaveBeenCalledWith(
expect.objectContaining({
chat_template_kwargs: expect.anything(),
}),
)
})

it("should handle chat_template_kwargs in completePrompt for V3.1 Terminus", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "deepseek/deepseek-v3.1-terminus",
reasoningEffort: "high",
})

const mockResponse = { choices: [{ message: { content: "test completion" } }] }
const mockCreate = vitest.fn().mockResolvedValue(mockResponse)
;(OpenAI as any).prototype.chat = {
completions: { create: mockCreate },
} as any

await handler.completePrompt("test prompt")

// Should include chat_template_kwargs with thinking:true for non-streaming as well
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
model: "deepseek/deepseek-v3.1-terminus",
chat_template_kwargs: { thinking: true },
stream: false,
}),
)
// Ensure reasoning parameter is NOT included
expect(mockCreate).not.toHaveBeenCalledWith(
expect.objectContaining({
reasoning: expect.anything(),
}),
)
})
})
})
32 changes: 30 additions & 2 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
include_reasoning?: boolean
// https://openrouter.ai/docs/use-cases/reasoning-tokens
reasoning?: OpenRouterReasoningParams
// For DeepSeek V3.1 Terminus models that require chat_template_kwargs
chat_template_kwargs?: { thinking?: boolean }
}

// See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
Expand Down Expand Up @@ -141,6 +143,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

const transforms = (this.options.openRouterUseMiddleOutTransform ?? true) ? ["middle-out"] : undefined

// Special handling for DeepSeek V3.1 Terminus models
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the DeepSeek-specific logic (converting reasoning to chat_template_kwargs) into a helper function to reduce duplication (appears in both createMessage and completePrompt).

// These models use chat_template_kwargs with thinking parameter instead of reasoning
let chatTemplateKwargs: { thinking?: boolean } | undefined
let finalReasoning = reasoning

if (modelId.startsWith("deepseek/deepseek-v3.1-terminus")) {
// For DeepSeek V3.1 Terminus, convert reasoning to chat_template_kwargs
// The reasoning object will be present if reasoning is enabled
const hasReasoningEnabled = Boolean(reasoning && !reasoning.exclude)
chatTemplateKwargs = { thinking: hasReasoningEnabled }
// Don't pass reasoning parameter for this model
finalReasoning = undefined
}

// https://openrouter.ai/docs/transforms
const completionParams: OpenRouterChatCompletionParams = {
model: modelId,
Expand All @@ -160,7 +176,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
},
}),
...(transforms && { transforms }),
...(reasoning && { reasoning }),
...(finalReasoning && { reasoning: finalReasoning }),
...(chatTemplateKwargs && { chat_template_kwargs: chatTemplateKwargs }),
}

let stream
Expand Down Expand Up @@ -248,6 +265,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
async completePrompt(prompt: string) {
let { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel()

// Handle DeepSeek V3.1 Terminus for non-streaming as well
let chatTemplateKwargs: { thinking?: boolean } | undefined
let finalReasoning = reasoning

if (modelId.startsWith("deepseek/deepseek-v3.1-terminus")) {
const hasReasoningEnabled = Boolean(reasoning && !reasoning.exclude)
chatTemplateKwargs = { thinking: hasReasoningEnabled }
finalReasoning = undefined
}

const completionParams: OpenRouterChatCompletionParams = {
model: modelId,
max_tokens: maxTokens,
Expand All @@ -263,7 +290,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
allow_fallbacks: false,
},
}),
...(reasoning && { reasoning }),
...(finalReasoning && { reasoning: finalReasoning }),
...(chatTemplateKwargs && { chat_template_kwargs: chatTemplateKwargs }),
}

let response
Expand Down