feat: add reasoning effort support to Chutes provider models

roomote · roomote · commit 3fd79db1a318 · 2025-10-29T13:05:45.000Z
- Add supportsReasoningEffort and reasoningEffort fields to reasoning-capable Chutes models - Update ChutesHandler to pass reasoning_effort parameter to API calls - Add ThinkingBudget component to Chutes provider settings for reasoning effort UI - Add comprehensive test coverage for reasoning effort functionality - Support reasoning effort for DeepSeek R1, Thinking, and other reasoning models Fixes #8904
diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
@@ -51,6 +51,8 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek R1 0528 model.",
@@ -60,6 +62,8 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek R1 model.",
@@ -207,6 +211,8 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek R1 Zero model.",
@@ -288,6 +294,8 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "TNGTech DeepSeek R1T Chimera model.",
@@ -345,6 +353,8 @@ export const chutesModels = {
 		contextWindow: 128000,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
@@ -382,6 +392,8 @@ export const chutesModels = {
 		contextWindow: 262144,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0.077968332,
 		outputPrice: 0.31202496,
 		description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.",
@@ -401,6 +413,8 @@ export const chutesModels = {
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
@@ -411,6 +425,8 @@ export const chutesModels = {
 		contextWindow: 262144,
 		supportsImages: true,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		reasoningEffort: "medium",
 		inputPrice: 0.16,
 		outputPrice: 0.65,
 		description:
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
@@ -503,6 +503,7 @@ describe("ChutesHandler", () => {
 				temperature: 0.6,
 				stream: true,
 				stream_options: { include_usage: true },
+				reasoning_effort: "medium", // DeepSeek R1 now supports reasoning effort with default "medium"
 			}),
 		)
 	})
@@ -540,7 +541,6 @@ describe("ChutesHandler", () => {
 				stream: true,
 				stream_options: { include_usage: true },
 			}),
-			undefined,
 		)
 	})
 
@@ -563,4 +563,155 @@ describe("ChutesHandler", () => {
 		const model = handlerWithModel.getModel()
 		expect(model.info.temperature).toBe(0.5)
 	})
+
+	describe("reasoning effort support", () => {
+		it("should pass reasoning effort for models that support it", async () => {
+			const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1"
+
+			// Clear previous mocks and set up new implementation
+			mockCreate.mockClear()
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "test" } }], usage: null }
+				},
+			}))
+
+			const handlerWithModel = new ChutesHandler({
+				apiModelId: modelId,
+				chutesApiKey: "test-chutes-api-key",
+				enableReasoningEffort: true,
+				reasoningEffort: "high",
+			})
+
+			const systemPrompt = "Test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+			const generator = handlerWithModel.createMessage(systemPrompt, messages)
+			await generator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning_effort: "high",
+				}),
+			)
+		})
+
+		it("should not pass reasoning effort for models that don't support it", async () => {
+			const modelId: ChutesModelId = "unsloth/Llama-3.3-70B-Instruct"
+
+			// Clear previous mocks and set up new implementation
+			mockCreate.mockClear()
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "test" } }], usage: null }
+				},
+			}))
+
+			const handlerWithModel = new ChutesHandler({
+				apiModelId: modelId,
+				chutesApiKey: "test-chutes-api-key",
+				enableReasoningEffort: true,
+				reasoningEffort: "high",
+			})
+
+			const systemPrompt = "Test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+			const generator = handlerWithModel.createMessage(systemPrompt, messages)
+			await generator.next()
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("reasoning_effort")
+		})
+
+		it("should use model default reasoning effort when not explicitly set", async () => {
+			const modelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8"
+
+			// Clear previous mocks and set up new implementation
+			mockCreate.mockClear()
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "test" } }], usage: null }
+				},
+			}))
+
+			const handlerWithModel = new ChutesHandler({
+				apiModelId: modelId,
+				chutesApiKey: "test-chutes-api-key",
+				// Not setting enableReasoningEffort or reasoningEffort to test model defaults
+			})
+
+			const systemPrompt = "Test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+			const generator = handlerWithModel.createMessage(systemPrompt, messages)
+			await generator.next()
+
+			// Since we don't set enableReasoningEffort to true, and just rely on model defaults,
+			// the reasoning_effort will be included because the model has a default reasoningEffort
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning_effort: "medium", // Should use the model's default
+				}),
+			)
+		})
+
+		it("should not pass reasoning effort when disabled", async () => {
+			const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1"
+
+			// Clear previous mocks and set up new implementation
+			mockCreate.mockClear()
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "test" } }], usage: null }
+				},
+			}))
+
+			const handlerWithModel = new ChutesHandler({
+				apiModelId: modelId,
+				chutesApiKey: "test-chutes-api-key",
+				enableReasoningEffort: false,
+				reasoningEffort: "high",
+			})
+
+			const systemPrompt = "Test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+			const generator = handlerWithModel.createMessage(systemPrompt, messages)
+			await generator.next()
+
+			const callArgs = mockCreate.mock.calls[0][0]
+			expect(callArgs).not.toHaveProperty("reasoning_effort")
+		})
+
+		it("should pass reasoning effort for thinking models", async () => {
+			const modelId: ChutesModelId = "Qwen/Qwen3-235B-A22B-Thinking-2507"
+
+			// Clear previous mocks and set up new implementation
+			mockCreate.mockClear()
+			mockCreate.mockImplementationOnce(async () => ({
+				[Symbol.asyncIterator]: async function* () {
+					yield { choices: [{ delta: { content: "test" } }], usage: null }
+				},
+			}))
+
+			const handlerWithModel = new ChutesHandler({
+				apiModelId: modelId,
+				chutesApiKey: "test-chutes-api-key",
+				reasoningEffort: "low", // Just set the reasoning effort, no need for enableReasoningEffort
+			})
+
+			const systemPrompt = "Test system prompt"
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
+
+			const generator = handlerWithModel.createMessage(systemPrompt, messages)
+			await generator.next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning_effort: "low",
+				}),
+			)
+		})
+	})
 })
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
@@ -3,10 +3,12 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { getModelMaxOutputTokens } from "../../shared/api"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { ApiStream } from "../transform/stream"
+import { getModelParams } from "../transform/model-params"
 
 import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
 
@@ -27,12 +29,18 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 	): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
-		const {
-			id: model,
-			info: { maxTokens: max_tokens },
-		} = this.getModel()
+		const { id: model, info, reasoning } = this.getModel()
 
-		const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
+		const temperature = this.options.modelTemperature ?? info.temperature
+
+		// Use centralized cap logic for max_tokens
+		const max_tokens =
+			getModelMaxOutputTokens({
+				modelId: model,
+				model: info,
+				settings: this.options,
+				format: "openai",
+			}) ?? undefined
 
 		return {
 			model,
@@ -41,6 +49,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
 			stream_options: { include_usage: true },
+			...(reasoning && reasoning),
 		}
 	}
 
@@ -85,19 +94,46 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 				yield processedChunk
 			}
 		} else {
-			yield* super.createMessage(systemPrompt, messages)
+			// For non-DeepSeek models, we need to handle reasoning effort
+			const stream = await this.client.chat.completions.create(this.getCompletionParams(systemPrompt, messages))
+
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+
+				if (delta?.content) {
+					yield {
+						type: "text",
+						text: delta.content,
+					}
+				}
+
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
+				}
+			}
 		}
 	}
 
 	override getModel() {
 		const model = super.getModel()
 		const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
+		const params = getModelParams({
+			format: "openai",
+			modelId: model.id,
+			model: model.info,
+			settings: this.options,
+		})
 		return {
 			...model,
 			info: {
 				...model.info,
 				temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature,
 			},
+			...params,
 		}
 	}
 }
diff --git a/webview-ui/src/components/settings/providers/Chutes.tsx b/webview-ui/src/components/settings/providers/Chutes.tsx
@@ -5,8 +5,10 @@ import type { ProviderSettings } from "@roo-code/types"
 
 import { useAppTranslation } from "@src/i18n/TranslationContext"
 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
+import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel"
 
 import { inputEventTransform } from "../transforms"
+import { ThinkingBudget } from "../ThinkingBudget"
 
 type ChutesProps = {
 	apiConfiguration: ProviderSettings
@@ -15,6 +17,7 @@ type ChutesProps = {
 
 export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesProps) => {
 	const { t } = useAppTranslation()
+	const { info: modelInfo } = useSelectedModel(apiConfiguration)
 
 	const handleInputChange = useCallback(
 		<K extends keyof ProviderSettings, E>(
@@ -45,6 +48,11 @@ export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesPro
 					{t("settings:providers.getChutesApiKey")}
 				</VSCodeButtonLink>
 			)}
+			<ThinkingBudget
+				apiConfiguration={apiConfiguration}
+				setApiConfigurationField={setApiConfigurationField}
+				modelInfo={modelInfo}
+			/>
 		</>
 	)
 }