diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts index f0dbc4ba0589..6ad0b8d6c0e3 100644 --- a/packages/types/src/providers/chutes.ts +++ b/packages/types/src/providers/chutes.ts @@ -51,6 +51,8 @@ export const chutesModels = { contextWindow: 163840, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: "DeepSeek R1 0528 model.", @@ -60,6 +62,8 @@ export const chutesModels = { contextWindow: 163840, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: "DeepSeek R1 model.", @@ -207,6 +211,8 @@ export const chutesModels = { contextWindow: 163840, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: "DeepSeek R1 Zero model.", @@ -288,6 +294,8 @@ export const chutesModels = { contextWindow: 163840, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: "TNGTech DeepSeek R1T Chimera model.", @@ -345,6 +353,8 @@ export const chutesModels = { contextWindow: 128000, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: @@ -382,6 +392,8 @@ export const chutesModels = { contextWindow: 262144, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0.077968332, outputPrice: 0.31202496, description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.", @@ -401,6 +413,8 @@ export const chutesModels = { contextWindow: 131072, supportsImages: false, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0, outputPrice: 0, description: @@ -411,6 +425,8 @@ export const chutesModels = { contextWindow: 262144, supportsImages: true, supportsPromptCache: false, + supportsReasoningEffort: true, + reasoningEffort: "medium", inputPrice: 0.16, outputPrice: 0.65, description: diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts index c7fa0dd750fa..6b4ddad7c389 100644 --- a/src/api/providers/__tests__/chutes.spec.ts +++ b/src/api/providers/__tests__/chutes.spec.ts @@ -503,6 +503,7 @@ describe("ChutesHandler", () => { temperature: 0.6, stream: true, stream_options: { include_usage: true }, + reasoning_effort: "medium", // DeepSeek R1 now supports reasoning effort with default "medium" }), ) }) @@ -540,7 +541,6 @@ describe("ChutesHandler", () => { stream: true, stream_options: { include_usage: true }, }), - undefined, ) }) @@ -563,4 +563,155 @@ describe("ChutesHandler", () => { const model = handlerWithModel.getModel() expect(model.info.temperature).toBe(0.5) }) + + describe("reasoning effort support", () => { + it("should pass reasoning effort for models that support it", async () => { + const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1" + + // Clear previous mocks and set up new implementation + mockCreate.mockClear() + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "test" } }], usage: null } + }, + })) + + const handlerWithModel = new ChutesHandler({ + apiModelId: modelId, + chutesApiKey: "test-chutes-api-key", + enableReasoningEffort: true, + reasoningEffort: "high", + }) + + const systemPrompt = "Test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }] + + const generator = handlerWithModel.createMessage(systemPrompt, messages) + await generator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning_effort: "high", + }), + ) + }) + + it("should not pass reasoning effort for models that don't support it", async () => { + const modelId: ChutesModelId = "unsloth/Llama-3.3-70B-Instruct" + + // Clear previous mocks and set up new implementation + mockCreate.mockClear() + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "test" } }], usage: null } + }, + })) + + const handlerWithModel = new ChutesHandler({ + apiModelId: modelId, + chutesApiKey: "test-chutes-api-key", + enableReasoningEffort: true, + reasoningEffort: "high", + }) + + const systemPrompt = "Test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }] + + const generator = handlerWithModel.createMessage(systemPrompt, messages) + await generator.next() + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("reasoning_effort") + }) + + it("should use model default reasoning effort when not explicitly set", async () => { + const modelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8" + + // Clear previous mocks and set up new implementation + mockCreate.mockClear() + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "test" } }], usage: null } + }, + })) + + const handlerWithModel = new ChutesHandler({ + apiModelId: modelId, + chutesApiKey: "test-chutes-api-key", + // Not setting enableReasoningEffort or reasoningEffort to test model defaults + }) + + const systemPrompt = "Test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }] + + const generator = handlerWithModel.createMessage(systemPrompt, messages) + await generator.next() + + // Since we don't set enableReasoningEffort to true, and just rely on model defaults, + // the reasoning_effort will be included because the model has a default reasoningEffort + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning_effort: "medium", // Should use the model's default + }), + ) + }) + + it("should not pass reasoning effort when disabled", async () => { + const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1" + + // Clear previous mocks and set up new implementation + mockCreate.mockClear() + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "test" } }], usage: null } + }, + })) + + const handlerWithModel = new ChutesHandler({ + apiModelId: modelId, + chutesApiKey: "test-chutes-api-key", + enableReasoningEffort: false, + reasoningEffort: "high", + }) + + const systemPrompt = "Test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }] + + const generator = handlerWithModel.createMessage(systemPrompt, messages) + await generator.next() + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs).not.toHaveProperty("reasoning_effort") + }) + + it("should pass reasoning effort for thinking models", async () => { + const modelId: ChutesModelId = "Qwen/Qwen3-235B-A22B-Thinking-2507" + + // Clear previous mocks and set up new implementation + mockCreate.mockClear() + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { content: "test" } }], usage: null } + }, + })) + + const handlerWithModel = new ChutesHandler({ + apiModelId: modelId, + chutesApiKey: "test-chutes-api-key", + reasoningEffort: "low", // Just set the reasoning effort, no need for enableReasoningEffort + }) + + const systemPrompt = "Test system prompt" + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }] + + const generator = handlerWithModel.createMessage(systemPrompt, messages) + await generator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning_effort: "low", + }), + ) + }) + }) }) diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts index 62121bd19dc0..9d536f12a92f 100644 --- a/src/api/providers/chutes.ts +++ b/src/api/providers/chutes.ts @@ -3,10 +3,12 @@ import { Anthropic } from "@anthropic-ai/sdk" import OpenAI from "openai" import type { ApiHandlerOptions } from "../../shared/api" +import { getModelMaxOutputTokens } from "../../shared/api" import { XmlMatcher } from "../../utils/xml-matcher" import { convertToR1Format } from "../transform/r1-format" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" +import { getModelParams } from "../transform/model-params" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" @@ -27,12 +29,18 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], ): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming { - const { - id: model, - info: { maxTokens: max_tokens }, - } = this.getModel() + const { id: model, info, reasoning } = this.getModel() - const temperature = this.options.modelTemperature ?? this.getModel().info.temperature + const temperature = this.options.modelTemperature ?? info.temperature + + // Use centralized cap logic for max_tokens + const max_tokens = + getModelMaxOutputTokens({ + modelId: model, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined return { model, @@ -41,6 +49,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], stream: true, stream_options: { include_usage: true }, + ...(reasoning && reasoning), } } @@ -85,19 +94,46 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider { yield processedChunk } } else { - yield* super.createMessage(systemPrompt, messages) + // For non-DeepSeek models, we need to handle reasoning effort + const stream = await this.client.chat.completions.create(this.getCompletionParams(systemPrompt, messages)) + + for await (const chunk of stream) { + const delta = chunk.choices[0]?.delta + + if (delta?.content) { + yield { + type: "text", + text: delta.content, + } + } + + if (chunk.usage) { + yield { + type: "usage", + inputTokens: chunk.usage.prompt_tokens || 0, + outputTokens: chunk.usage.completion_tokens || 0, + } + } + } } } override getModel() { const model = super.getModel() const isDeepSeekR1 = model.id.includes("DeepSeek-R1") + const params = getModelParams({ + format: "openai", + modelId: model.id, + model: model.info, + settings: this.options, + }) return { ...model, info: { ...model.info, temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature, }, + ...params, } } } diff --git a/webview-ui/src/components/settings/providers/Chutes.tsx b/webview-ui/src/components/settings/providers/Chutes.tsx index c51479421a92..3488a4f55605 100644 --- a/webview-ui/src/components/settings/providers/Chutes.tsx +++ b/webview-ui/src/components/settings/providers/Chutes.tsx @@ -5,8 +5,10 @@ import type { ProviderSettings } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink" +import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel" import { inputEventTransform } from "../transforms" +import { ThinkingBudget } from "../ThinkingBudget" type ChutesProps = { apiConfiguration: ProviderSettings @@ -15,6 +17,7 @@ type ChutesProps = { export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesProps) => { const { t } = useAppTranslation() + const { info: modelInfo } = useSelectedModel(apiConfiguration) const handleInputChange = useCallback( ( @@ -45,6 +48,11 @@ export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesPro {t("settings:providers.getChutesApiKey")} )} + ) }