diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 15833e00c4..5052170f62 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -190,6 +190,7 @@ const openAiSchema = baseProviderSettingsSchema.extend({ openAiStreamingEnabled: z.boolean().optional(), openAiHostHeader: z.string().optional(), // Keep temporarily for backward compatibility during migration. openAiHeaders: z.record(z.string(), z.string()).optional(), + openAiSkipSystemMessage: z.boolean().optional(), // Skip system message for models that auto-add BOS tokens (e.g., llama.cpp with --jinja) }) const ollamaSchema = baseProviderSettingsSchema.extend({ diff --git a/src/api/providers/__tests__/openai-deepseek-bos.spec.ts b/src/api/providers/__tests__/openai-deepseek-bos.spec.ts new file mode 100644 index 0000000000..286bdc7d53 --- /dev/null +++ b/src/api/providers/__tests__/openai-deepseek-bos.spec.ts @@ -0,0 +1,231 @@ +import { describe, it, expect, vi, beforeEach } from "vitest" +import OpenAI from "openai" +import { Anthropic } from "@anthropic-ai/sdk" + +import { OpenAiHandler } from "../openai" +import type { ApiHandlerOptions } from "../../../shared/api" + +vi.mock("openai") + +describe("OpenAI Handler - DeepSeek V3 BOS Token Handling", () => { + let mockOpenAIClient: any + let mockStream: any + + beforeEach(() => { + vi.clearAllMocks() + + // Create a mock async generator for streaming + mockStream = (async function* () { + yield { + choices: [{ delta: { content: "Test response" } }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + } + })() + + mockOpenAIClient = { + chat: { + completions: { + create: vi.fn().mockResolvedValue(mockStream), + }, + }, + } + + vi.mocked(OpenAI).mockImplementation(() => mockOpenAIClient as any) + }) + + describe("Streaming mode", () => { + it("should skip system message when openAiSkipSystemMessage is true for DeepSeek V3", async () => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: "deepseek-v3", + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: true, + openAiSkipSystemMessage: true, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = handler.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: expect.stringContaining("You are a helpful assistant"), + }), + ]), + }), + expect.any(Object), + ) + + // Verify system message is not included separately + const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0] + expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined() + }) + + it("should include system message normally when openAiSkipSystemMessage is false", async () => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: "deepseek-v3", + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: true, + openAiSkipSystemMessage: false, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = handler.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "system", + content: "You are a helpful assistant", + }), + ]), + }), + expect.any(Object), + ) + }) + + it("should handle case when no user message exists", async () => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: "deepseek-v3.1", + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: true, + openAiSkipSystemMessage: true, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "assistant", content: "Previous response" }] + + const stream = handler.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + // Should create a user message with system prompt + expect(mockOpenAIClient.chat.completions.create).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: "You are a helpful assistant", + }), + ]), + }), + expect.any(Object), + ) + }) + }) + + describe("Non-streaming mode", () => { + beforeEach(() => { + mockOpenAIClient.chat.completions.create = vi.fn().mockResolvedValue({ + choices: [{ message: { content: "Test response" } }], + usage: { prompt_tokens: 10, completion_tokens: 5 }, + }) + }) + + it("should skip system message in non-streaming mode when configured", async () => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: "deepseek-v3", + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: false, + openAiSkipSystemMessage: true, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "You are a helpful assistant" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const stream = handler.createMessage(systemPrompt, messages) + const results = [] + for await (const chunk of stream) { + results.push(chunk) + } + + const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0] + // First message should be user message with merged system prompt + expect(callArgs.messages[0]).toMatchObject({ + role: "user", + content: expect.stringContaining("You are a helpful assistant"), + }) + // No separate system message + expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined() + }) + }) + + describe("Model detection", () => { + it.each(["deepseek-v3", "deepseek-v3.1", "DeepSeek-V3", "DEEPSEEK-V3.1", "deepseek-chat"])( + "should detect %s as DeepSeek model when skipSystemMessage is enabled", + async (modelId) => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: modelId, + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: true, + openAiSkipSystemMessage: true, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "System prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "User message" }] + + const stream = handler.createMessage(systemPrompt, messages) + for await (const chunk of stream) { + // Consume stream + } + + const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0] + // Should merge system prompt into user message + expect(callArgs.messages[0].content).toContain("System prompt") + expect(callArgs.messages.find((m: any) => m.role === "system")).toBeUndefined() + }, + ) + + it("should not apply skip logic to non-DeepSeek models", async () => { + const options: ApiHandlerOptions = { + openAiApiKey: "test-key", + openAiModelId: "gpt-4", + openAiBaseUrl: "http://localhost:11434/v1", + openAiStreamingEnabled: true, + openAiSkipSystemMessage: true, + } + + const handler = new OpenAiHandler(options) + const systemPrompt = "System prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "User message" }] + + const stream = handler.createMessage(systemPrompt, messages) + for await (const chunk of stream) { + // Consume stream + } + + const callArgs = mockOpenAIClient.chat.completions.create.mock.calls[0][0] + // Should still have system message for non-DeepSeek models + expect(callArgs.messages[0]).toMatchObject({ + role: "system", + content: "System prompt", + }) + }) + }) +}) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 36158d770c..f56bb6a8b3 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -105,8 +105,28 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl let convertedMessages + // Check if we should skip system message for DeepSeek V3 models with llama.cpp + const skipSystemMessage = + this.options.openAiSkipSystemMessage && + (modelId.toLowerCase().includes("deepseek") || modelId.toLowerCase().includes("deepseek-v3")) + if (deepseekReasoner) { convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + } else if (skipSystemMessage) { + // For DeepSeek V3 with llama.cpp, merge system prompt into first user message to avoid duplicate BOS + const firstUserMessage = messages.find((msg) => msg.role === "user") + if (firstUserMessage) { + const modifiedMessages = [...messages] + const firstUserIndex = modifiedMessages.findIndex((msg) => msg.role === "user") + modifiedMessages[firstUserIndex] = { + ...firstUserMessage, + content: `${systemPrompt}\n\n${typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content)}`, + } + convertedMessages = convertToOpenAiMessages(modifiedMessages) + } else { + // If no user message, create one with the system prompt + convertedMessages = convertToOpenAiMessages([{ role: "user", content: systemPrompt }, ...messages]) + } } else if (ark || enabledLegacyFormat) { convertedMessages = [systemMessage, ...convertToSimpleMessages(messages)] } else { @@ -224,13 +244,37 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl content: systemPrompt, } + // Check if we should skip system message for DeepSeek V3 models with llama.cpp + const skipSystemMessage = + this.options.openAiSkipSystemMessage && + (modelId.toLowerCase().includes("deepseek") || modelId.toLowerCase().includes("deepseek-v3")) + + let messagesForRequest + if (deepseekReasoner) { + messagesForRequest = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + } else if (skipSystemMessage) { + // For DeepSeek V3 with llama.cpp, merge system prompt into first user message + const firstUserMessage = messages.find((msg) => msg.role === "user") + if (firstUserMessage) { + const modifiedMessages = [...messages] + const firstUserIndex = modifiedMessages.findIndex((msg) => msg.role === "user") + modifiedMessages[firstUserIndex] = { + ...firstUserMessage, + content: `${systemPrompt}\n\n${typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content)}`, + } + messagesForRequest = convertToOpenAiMessages(modifiedMessages) + } else { + messagesForRequest = convertToOpenAiMessages([{ role: "user", content: systemPrompt }, ...messages]) + } + } else if (enabledLegacyFormat) { + messagesForRequest = [systemMessage, ...convertToSimpleMessages(messages)] + } else { + messagesForRequest = [systemMessage, ...convertToOpenAiMessages(messages)] + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, - messages: deepseekReasoner - ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) - : enabledLegacyFormat - ? [systemMessage, ...convertToSimpleMessages(messages)] - : [systemMessage, ...convertToOpenAiMessages(messages)], + messages: messagesForRequest, } // Add max_tokens if needed