From cb23be634661882b154739a47f9d9137ba228fd7 Mon Sep 17 00:00:00 2001 From: Piotr Rogowski Date: Mon, 27 Jan 2025 21:27:24 +0100 Subject: [PATCH 1/2] Extend deepseek-r1 support --- src/api/providers/openai.ts | 38 +++++++------ src/api/providers/openrouter.ts | 6 +- src/api/transform/r1-format.ts | 98 +++++++++++++++++++++++++++++++++ src/core/Cline.ts | 4 ++ 4 files changed, 129 insertions(+), 17 deletions(-) create mode 100644 src/api/transform/r1-format.ts diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index c5e3ae9e48b..15156200845 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -1,5 +1,6 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI, { AzureOpenAI } from "openai" + import { ApiHandlerOptions, azureOpenAiDefaultApiVersion, @@ -8,6 +9,7 @@ import { } from "../../shared/api" import { ApiHandler, SingleCompletionHandler } from "../index" import { convertToOpenAiMessages } from "../transform/openai-format" +import { convertToR1Format } from "../transform/r1-format" import { ApiStream } from "../transform/stream" export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { @@ -16,7 +18,8 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { constructor(options: ApiHandlerOptions) { this.options = options - // Azure API shape slightly differs from the core API shape: https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai + // Azure API shape slightly differs from the core API shape: + // https://github.com/openai/openai-node?tab=readme-ov-file#microsoft-azure-openai const urlHost = new URL(this.options.openAiBaseUrl ?? "").host if (urlHost === "azure.com" || urlHost.endsWith(".azure.com") || options.openAiUseAzure) { this.client = new AzureOpenAI({ @@ -38,7 +41,7 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const deepseekReasoner = modelId.includes("deepseek-reasoner") - if (!deepseekReasoner && (this.options.openAiStreamingEnabled ?? true)) { + if (this.options.openAiStreamingEnabled ?? true) { const systemMessage: OpenAI.Chat.ChatCompletionSystemMessageParam = { role: "system", content: systemPrompt, @@ -46,7 +49,9 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, temperature: 0, - messages: [systemMessage, ...convertToOpenAiMessages(messages)], + messages: deepseekReasoner + ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + : [systemMessage, ...convertToOpenAiMessages(messages)], stream: true as const, stream_options: { include_usage: true }, } @@ -64,6 +69,12 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { text: delta.content, } } + if ("reasoning_content" in delta && delta.reasoning_content) { + yield { + type: "reasoning", + text: (delta.reasoning_content as string | undefined) || "", + } + } if (chunk.usage) { yield { type: "usage", @@ -73,24 +84,19 @@ export class OpenAiHandler implements ApiHandler, SingleCompletionHandler { } } } else { - let systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam | OpenAI.Chat.ChatCompletionSystemMessageParam - // o1 for instance doesnt support streaming, non-1 temp, or system prompt - // deepseek reasoner supports system prompt - systemMessage = deepseekReasoner - ? { - role: "system", - content: systemPrompt, - } - : { - role: "user", - content: systemPrompt, - } + const systemMessage: OpenAI.Chat.ChatCompletionUserMessageParam = { + role: "user", + content: systemPrompt, + } const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, - messages: [systemMessage, ...convertToOpenAiMessages(messages)], + messages: deepseekReasoner + ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) + : [systemMessage, ...convertToOpenAiMessages(messages)], } + const response = await this.client.chat.completions.create(requestOptions) yield { diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 63aeb055b4e..6922e406131 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -19,6 +19,7 @@ interface OpenRouterApiStreamUsageChunk extends ApiStreamUsageChunk { } import { SingleCompletionHandler } from ".." +import { convertToR1Format } from "../transform/r1-format" export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler { private options: ApiHandlerOptions @@ -41,7 +42,7 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler { messages: Anthropic.Messages.MessageParam[], ): AsyncGenerator { // Convert Anthropic messages to OpenAI format - const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ + let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages), ] @@ -117,6 +118,9 @@ export class OpenRouterHandler implements ApiHandler, SingleCompletionHandler { case "deepseek/deepseek-r1": // Recommended temperature for DeepSeek reasoning models temperature = 0.6 + // DeepSeek highly recommends using user instead of system role + openAiMessages[0].role = "user" + openAiMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) } // https://openrouter.ai/docs/transforms diff --git a/src/api/transform/r1-format.ts b/src/api/transform/r1-format.ts new file mode 100644 index 00000000000..51a4b94dbc4 --- /dev/null +++ b/src/api/transform/r1-format.ts @@ -0,0 +1,98 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +type ContentPartText = OpenAI.Chat.ChatCompletionContentPartText +type ContentPartImage = OpenAI.Chat.ChatCompletionContentPartImage +type UserMessage = OpenAI.Chat.ChatCompletionUserMessageParam +type AssistantMessage = OpenAI.Chat.ChatCompletionAssistantMessageParam +type Message = OpenAI.Chat.ChatCompletionMessageParam +type AnthropicMessage = Anthropic.Messages.MessageParam + +/** + * Converts Anthropic messages to OpenAI format while merging consecutive messages with the same role. + * This is required for DeepSeek Reasoner which does not support successive messages with the same role. + * + * @param messages Array of Anthropic messages + * @returns Array of OpenAI messages where consecutive messages with the same role are combined + */ +export function convertToR1Format(messages: AnthropicMessage[]): Message[] { + return messages.reduce((merged, message) => { + const lastMessage = merged[merged.length - 1] + let messageContent: string | (ContentPartText | ContentPartImage)[] = "" + let hasImages = false + + // Convert content to appropriate format + if (Array.isArray(message.content)) { + const textParts: string[] = [] + const imageParts: ContentPartImage[] = [] + + message.content.forEach((part) => { + if (part.type === "text") { + textParts.push(part.text) + } + if (part.type === "image") { + hasImages = true + imageParts.push({ + type: "image_url", + image_url: { url: `data:${part.source.media_type};base64,${part.source.data}` }, + }) + } + }) + + if (hasImages) { + const parts: (ContentPartText | ContentPartImage)[] = [] + if (textParts.length > 0) { + parts.push({ type: "text", text: textParts.join("\n") }) + } + parts.push(...imageParts) + messageContent = parts + } else { + messageContent = textParts.join("\n") + } + } else { + messageContent = message.content + } + + // If last message has same role, merge the content + if (lastMessage?.role === message.role) { + if (typeof lastMessage.content === "string" && typeof messageContent === "string") { + lastMessage.content += `\n${messageContent}` + } + // If either has image content, convert both to array format + else { + const lastContent = Array.isArray(lastMessage.content) + ? lastMessage.content + : [{ type: "text" as const, text: lastMessage.content || "" }] + + const newContent = Array.isArray(messageContent) + ? messageContent + : [{ type: "text" as const, text: messageContent }] + + if (message.role === "assistant") { + const mergedContent = [...lastContent, ...newContent] as AssistantMessage["content"] + lastMessage.content = mergedContent + } else { + const mergedContent = [...lastContent, ...newContent] as UserMessage["content"] + lastMessage.content = mergedContent + } + } + } else { + // Add as new message with the correct type based on role + if (message.role === "assistant") { + const newMessage: AssistantMessage = { + role: "assistant", + content: messageContent as AssistantMessage["content"], + } + merged.push(newMessage) + } else { + const newMessage: UserMessage = { + role: "user", + content: messageContent as UserMessage["content"], + } + merged.push(newMessage) + } + } + + return merged + }, []) +} diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 859d2d54845..afaa923f296 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -2391,6 +2391,10 @@ export class Cline { let reasoningMessage = "" try { for await (const chunk of stream) { + if (!chunk) { + // Sometimes chunk is undefined, no idea that can cause it, but this workaround seems to fix it + continue + } switch (chunk.type) { case "reasoning": reasoningMessage += chunk.text From 18c7f57afbaaab60e8a5845243ed26bc60661d29 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Tue, 28 Jan 2025 00:15:14 -0500 Subject: [PATCH 2/2] Add test --- src/api/transform/__tests__/r1-format.test.ts | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 src/api/transform/__tests__/r1-format.test.ts diff --git a/src/api/transform/__tests__/r1-format.test.ts b/src/api/transform/__tests__/r1-format.test.ts new file mode 100644 index 00000000000..fce1f99da82 --- /dev/null +++ b/src/api/transform/__tests__/r1-format.test.ts @@ -0,0 +1,180 @@ +import { convertToR1Format } from "../r1-format" +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" + +describe("convertToR1Format", () => { + it("should convert basic text messages", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there" }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi there" }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) + + it("should merge consecutive messages with same role", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "Hello" }, + { role: "user", content: "How are you?" }, + { role: "assistant", content: "Hi!" }, + { role: "assistant", content: "I'm doing well" }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "user", content: "Hello\nHow are you?" }, + { role: "assistant", content: "Hi!\nI'm doing well" }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) + + it("should handle image content", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64data", + }, + }, + ], + }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { + role: "user", + content: [ + { + type: "image_url", + image_url: { + url: "data:image/jpeg;base64,base64data", + }, + }, + ], + }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) + + it("should handle mixed text and image content", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text", text: "Check this image:" }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "base64data", + }, + }, + ], + }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { + role: "user", + content: [ + { type: "text", text: "Check this image:" }, + { + type: "image_url", + image_url: { + url: "data:image/jpeg;base64,base64data", + }, + }, + ], + }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) + + it("should merge mixed content messages with same role", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [ + { type: "text", text: "First image:" }, + { + type: "image", + source: { + type: "base64", + media_type: "image/jpeg", + data: "image1", + }, + }, + ], + }, + { + role: "user", + content: [ + { type: "text", text: "Second image:" }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "image2", + }, + }, + ], + }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { + role: "user", + content: [ + { type: "text", text: "First image:" }, + { + type: "image_url", + image_url: { + url: "data:image/jpeg;base64,image1", + }, + }, + { type: "text", text: "Second image:" }, + { + type: "image_url", + image_url: { + url: "data:image/png;base64,image2", + }, + }, + ], + }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) + + it("should handle empty messages array", () => { + expect(convertToR1Format([])).toEqual([]) + }) + + it("should handle messages with empty content", () => { + const input: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "" }, + { role: "assistant", content: "" }, + ] + + const expected: OpenAI.Chat.ChatCompletionMessageParam[] = [ + { role: "user", content: "" }, + { role: "assistant", content: "" }, + ] + + expect(convertToR1Format(input)).toEqual(expected) + }) +})