Skip to content

Commit bd283b7

Browse files
committed
fix: add openAiSkipSystemMessage option to prevent duplicate BOS tokens with DeepSeek V3.1
- Added openAiSkipSystemMessage configuration option for OpenAI Compatible providers - When enabled for DeepSeek models, merges system prompt into first user message - Prevents duplicate BOS tokens when using llama.cpp with --jinja flag - Added comprehensive tests for the new functionality Fixes #7500
1 parent aee531a commit bd283b7

File tree

3 files changed

+281
-5
lines changed

3 files changed

+281
-5
lines changed

packages/types/src/provider-settings.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({
190190
openAiStreamingEnabled: z.boolean().optional(),
191191
openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration.
192192
openAiHeaders: z.record(z.string(), z.string()).optional(),
193+
openAiSkipSystemMessage: z.boolean().optional(), // Skip system message for models that auto-add BOS tokens (e.g., llama.cpp with --jinja)
193194
})
194195

195196
const ollamaSchema = baseProviderSettingsSchema.extend({
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
import { describe, it, expect, vi, beforeEach } from "vitest"
2+
import OpenAI from "openai"
3+
import { Anthropic } from "@anthropic-ai/sdk"
4+
5+
import { OpenAiHandler } from "../openai"
6+
import type { ApiHandlerOptions } from "../../../shared/api"
7+
8+
vi.mock("openai")
9+
10+
describe("OpenAI Handler - DeepSeek V3 BOS Token Handling", () => {
11+
let mockOpenAIClient: any
12+
let mockStream: any
13+
14+
beforeEach(() => {
15+
vi.clearAllMocks()
16+
17+
// Create a mock async generator for streaming
18+
mockStream = (async function* () {
19+
yield {
20+
choices: [{ delta: { content: "Test response" } }],
21+
usage: { prompt_tokens: 10, completion_tokens: 5 },
22+
}
23+
})()
24+
25+
mockOpenAIClient = {
26+
chat: {
27+
completions: {
28+
create: vi.fn().mockResolvedValue(mockStream),
29+
},
30+
},
31+
}
32+
33+
vi.mocked(OpenAI).mockImplementation(() => mockOpenAIClient as any)
34+
})
35+
36+
describe("Streaming mode", () => {
37+
it("should skip system message when openAiSkipSystemMessage is true for DeepSeek V3", async () => {
38+
const options: ApiHandlerOptions = {
39+
openAiApiKey: "test-key",
40+
openAiModelId: "deepseek-v3",
41+
openAiBaseUrl: "http://localhost:11434/v1",
42+
openAiStreamingEnabled: true,
43+
openAiSkipSystemMessage: true,
44+
}
45+
46+
const handler = new OpenAiHandler(options)
47+
const systemPrompt = "You are a helpful assistant"
48+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
49+
50+
const stream = handler.createMessage(systemPrompt, messages)
51+
const results = []
52+
for await (const chunk of stream) {
53+
results.push(chunk)
54+
}
55+
56+
expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith(
57+
expect.objectContaining({
58+
messages: expect.arrayContaining([
59+
expect.objectContaining({
60+
role: "user",
61+
content: expect.stringContaining("You are a helpful assistant"),
62+
}),
63+
]),
64+
}),
65+
expect.any(Object),
66+
)
67+
68+
// Verify system message is not included separately
69+
const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0]
70+
expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined()
71+
})
72+
73+
it("should include system message normally when openAiSkipSystemMessage is false", async () => {
74+
const options: ApiHandlerOptions = {
75+
openAiApiKey: "test-key",
76+
openAiModelId: "deepseek-v3",
77+
openAiBaseUrl: "http://localhost:11434/v1",
78+
openAiStreamingEnabled: true,
79+
openAiSkipSystemMessage: false,
80+
}
81+
82+
const handler = new OpenAiHandler(options)
83+
const systemPrompt = "You are a helpful assistant"
84+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
85+
86+
const stream = handler.createMessage(systemPrompt, messages)
87+
const results = []
88+
for await (const chunk of stream) {
89+
results.push(chunk)
90+
}
91+
92+
expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith(
93+
expect.objectContaining({
94+
messages: expect.arrayContaining([
95+
expect.objectContaining({
96+
role: "system",
97+
content: "You are a helpful assistant",
98+
}),
99+
]),
100+
}),
101+
expect.any(Object),
102+
)
103+
})
104+
105+
it("should handle case when no user message exists", async () => {
106+
const options: ApiHandlerOptions = {
107+
openAiApiKey: "test-key",
108+
openAiModelId: "deepseek-v3.1",
109+
openAiBaseUrl: "http://localhost:11434/v1",
110+
openAiStreamingEnabled: true,
111+
openAiSkipSystemMessage: true,
112+
}
113+
114+
const handler = new OpenAiHandler(options)
115+
const systemPrompt = "You are a helpful assistant"
116+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "assistant", content: "Previous response" }]
117+
118+
const stream = handler.createMessage(systemPrompt, messages)
119+
const results = []
120+
for await (const chunk of stream) {
121+
results.push(chunk)
122+
}
123+
124+
// Should create a user message with system prompt
125+
expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith(
126+
expect.objectContaining({
127+
messages: expect.arrayContaining([
128+
expect.objectContaining({
129+
role: "user",
130+
content: "You are a helpful assistant",
131+
}),
132+
]),
133+
}),
134+
expect.any(Object),
135+
)
136+
})
137+
})
138+
139+
describe("Non-streaming mode", () => {
140+
beforeEach(() => {
141+
mockOpenAIClient.chat.completions.create = vi.fn().mockResolvedValue({
142+
choices: [{ message: { content: "Test response" } }],
143+
usage: { prompt_tokens: 10, completion_tokens: 5 },
144+
})
145+
})
146+
147+
it("should skip system message in non-streaming mode when configured", async () => {
148+
const options: ApiHandlerOptions = {
149+
openAiApiKey: "test-key",
150+
openAiModelId: "deepseek-v3",
151+
openAiBaseUrl: "http://localhost:11434/v1",
152+
openAiStreamingEnabled: false,
153+
openAiSkipSystemMessage: true,
154+
}
155+
156+
const handler = new OpenAiHandler(options)
157+
const systemPrompt = "You are a helpful assistant"
158+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
159+
160+
const stream = handler.createMessage(systemPrompt, messages)
161+
const results = []
162+
for await (const chunk of stream) {
163+
results.push(chunk)
164+
}
165+
166+
const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0]
167+
// First message should be user message with merged system prompt
168+
expect(callArgs.messages[0]).toMatchObject({
169+
role: "user",
170+
content: expect.stringContaining("You are a helpful assistant"),
171+
})
172+
// No separate system message
173+
expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined()
174+
})
175+
})
176+
177+
describe("Model detection", () => {
178+
it.each(["deepseek-v3", "deepseek-v3.1", "DeepSeek-V3", "DEEPSEEK-V3.1", "deepseek-chat"])(
179+
"should detect %s as DeepSeek model when skipSystemMessage is enabled",
180+
async (modelId) => {
181+
const options: ApiHandlerOptions = {
182+
openAiApiKey: "test-key",
183+
openAiModelId: modelId,
184+
openAiBaseUrl: "http://localhost:11434/v1",
185+
openAiStreamingEnabled: true,
186+
openAiSkipSystemMessage: true,
187+
}
188+
189+
const handler = new OpenAiHandler(options)
190+
const systemPrompt = "System prompt"
191+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "User message" }]
192+
193+
const stream = handler.createMessage(systemPrompt, messages)
194+
for await (const chunk of stream) {
195+
// Consume stream
196+
}
197+
198+
const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0]
199+
// Should merge system prompt into user message
200+
expect(callArgs.messages[0].content).toContain("System prompt")
201+
expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined()
202+
},
203+
)
204+
205+
it("should not apply skip logic to non-DeepSeek models", async () => {
206+
const options: ApiHandlerOptions = {
207+
openAiApiKey: "test-key",
208+
openAiModelId: "gpt-4",
209+
openAiBaseUrl: "http://localhost:11434/v1",
210+
openAiStreamingEnabled: true,
211+
openAiSkipSystemMessage: true,
212+
}
213+
214+
const handler = new OpenAiHandler(options)
215+
const systemPrompt = "System prompt"
216+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "User message" }]
217+
218+
const stream = handler.createMessage(systemPrompt, messages)
219+
for await (const chunk of stream) {
220+
// Consume stream
221+
}
222+
223+
const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0]
224+
// Should still have system message for non-DeepSeek models
225+
expect(callArgs.messages[0]).toMatchObject({
226+
role: "system",
227+
content: "System prompt",
228+
})
229+
})
230+
})
231+
})

src/api/providers/openai.ts

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,28 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
105105

106106
let convertedMessages
107107

108+
// Check if we should skip system message for DeepSeek V3 models with llama.cpp
109+
const skipSystemMessage =
110+
this.options.openAiSkipSystemMessage &&
111+
(modelId.toLowerCase().includes("deepseek") || modelId.toLowerCase().includes("deepseek-v3"))
112+
108113
if (deepseekReasoner) {
109114
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
115+
} else if (skipSystemMessage) {
116+
// For DeepSeek V3 with llama.cpp, merge system prompt into first user message to avoid duplicate BOS
117+
const firstUserMessage = messages.find((msg) => msg.role === "user")
118+
if (firstUserMessage) {
119+
const modifiedMessages = [...messages]
120+
const firstUserIndex = modifiedMessages.findIndex((msg) => msg.role === "user")
121+
modifiedMessages[firstUserIndex] = {
122+
...firstUserMessage,
123+
content: `${systemPrompt}\n\n${typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content)}`,
124+
}
125+
convertedMessages = convertToOpenAiMessages(modifiedMessages)
126+
} else {
127+
// If no user message, create one with the system prompt
128+
convertedMessages = convertToOpenAiMessages([{ role: "user", content: systemPrompt }, ...messages])
129+
}
110130
} else if (ark || enabledLegacyFormat) {
111131
convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)]
112132
} else {
@@ -224,13 +244,37 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
224244
content: systemPrompt,
225245
}
226246

247+
// Check if we should skip system message for DeepSeek V3 models with llama.cpp
248+
const skipSystemMessage =
249+
this.options.openAiSkipSystemMessage &&
250+
(modelId.toLowerCase().includes("deepseek") || modelId.toLowerCase().includes("deepseek-v3"))
251+
252+
let messagesForRequest
253+
if (deepseekReasoner) {
254+
messagesForRequest = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
255+
} else if (skipSystemMessage) {
256+
// For DeepSeek V3 with llama.cpp, merge system prompt into first user message
257+
const firstUserMessage = messages.find((msg) => msg.role === "user")
258+
if (firstUserMessage) {
259+
const modifiedMessages = [...messages]
260+
const firstUserIndex = modifiedMessages.findIndex((msg) => msg.role === "user")
261+
modifiedMessages[firstUserIndex] = {
262+
...firstUserMessage,
263+
content: `${systemPrompt}\n\n${typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content)}`,
264+
}
265+
messagesForRequest = convertToOpenAiMessages(modifiedMessages)
266+
} else {
267+
messagesForRequest = convertToOpenAiMessages([{ role: "user", content: systemPrompt }, ...messages])
268+
}
269+
} else if (enabledLegacyFormat) {
270+
messagesForRequest = [systemMessage, ...convertToSimpleMessages(messages)]
271+
} else {
272+
messagesForRequest = [systemMessage, ...convertToOpenAiMessages(messages)]
273+
}
274+
227275
const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
228276
model: modelId,
229-
messages: deepseekReasoner
230-
? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
231-
: enabledLegacyFormat
232-
? [systemMessage, ...convertToSimpleMessages(messages)]
233-
: [systemMessage, ...convertToOpenAiMessages(messages)],
277+
messages: messagesForRequest,
234278
}
235279

236280
// Add max_tokens if needed

0 commit comments

Comments
 (0)