Skip to content

Commit 3fd79db

Browse files
committed
feat: add reasoning effort support to Chutes provider models
- Add supportsReasoningEffort and reasoningEffort fields to reasoning-capable Chutes models - Update ChutesHandler to pass reasoning_effort parameter to API calls - Add ThinkingBudget component to Chutes provider settings for reasoning effort UI - Add comprehensive test coverage for reasoning effort functionality - Support reasoning effort for DeepSeek R1, Thinking, and other reasoning models Fixes #8904
1 parent f839d4c commit 3fd79db

File tree

4 files changed

+218
-7
lines changed

4 files changed

+218
-7
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ export const chutesModels = {
5151
contextWindow: 163840,
5252
supportsImages: false,
5353
supportsPromptCache: false,
54+
supportsReasoningEffort: true,
55+
reasoningEffort: "medium",
5456
inputPrice: 0,
5557
outputPrice: 0,
5658
description: "DeepSeek R1 0528 model.",
@@ -60,6 +62,8 @@ export const chutesModels = {
6062
contextWindow: 163840,
6163
supportsImages: false,
6264
supportsPromptCache: false,
65+
supportsReasoningEffort: true,
66+
reasoningEffort: "medium",
6367
inputPrice: 0,
6468
outputPrice: 0,
6569
description: "DeepSeek R1 model.",
@@ -207,6 +211,8 @@ export const chutesModels = {
207211
contextWindow: 163840,
208212
supportsImages: false,
209213
supportsPromptCache: false,
214+
supportsReasoningEffort: true,
215+
reasoningEffort: "medium",
210216
inputPrice: 0,
211217
outputPrice: 0,
212218
description: "DeepSeek R1 Zero model.",
@@ -288,6 +294,8 @@ export const chutesModels = {
288294
contextWindow: 163840,
289295
supportsImages: false,
290296
supportsPromptCache: false,
297+
supportsReasoningEffort: true,
298+
reasoningEffort: "medium",
291299
inputPrice: 0,
292300
outputPrice: 0,
293301
description: "TNGTech DeepSeek R1T Chimera model.",
@@ -345,6 +353,8 @@ export const chutesModels = {
345353
contextWindow: 128000,
346354
supportsImages: false,
347355
supportsPromptCache: false,
356+
supportsReasoningEffort: true,
357+
reasoningEffort: "medium",
348358
inputPrice: 0,
349359
outputPrice: 0,
350360
description:
@@ -382,6 +392,8 @@ export const chutesModels = {
382392
contextWindow: 262144,
383393
supportsImages: false,
384394
supportsPromptCache: false,
395+
supportsReasoningEffort: true,
396+
reasoningEffort: "medium",
385397
inputPrice: 0.077968332,
386398
outputPrice: 0.31202496,
387399
description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.",
@@ -401,6 +413,8 @@ export const chutesModels = {
401413
contextWindow: 131072,
402414
supportsImages: false,
403415
supportsPromptCache: false,
416+
supportsReasoningEffort: true,
417+
reasoningEffort: "medium",
404418
inputPrice: 0,
405419
outputPrice: 0,
406420
description:
@@ -411,6 +425,8 @@ export const chutesModels = {
411425
contextWindow: 262144,
412426
supportsImages: true,
413427
supportsPromptCache: false,
428+
supportsReasoningEffort: true,
429+
reasoningEffort: "medium",
414430
inputPrice: 0.16,
415431
outputPrice: 0.65,
416432
description:

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 152 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ describe("ChutesHandler", () => {
503503
temperature: 0.6,
504504
stream: true,
505505
stream_options: { include_usage: true },
506+
reasoning_effort: "medium", // DeepSeek R1 now supports reasoning effort with default "medium"
506507
}),
507508
)
508509
})
@@ -540,7 +541,6 @@ describe("ChutesHandler", () => {
540541
stream: true,
541542
stream_options: { include_usage: true },
542543
}),
543-
undefined,
544544
)
545545
})
546546

@@ -563,4 +563,155 @@ describe("ChutesHandler", () => {
563563
const model = handlerWithModel.getModel()
564564
expect(model.info.temperature).toBe(0.5)
565565
})
566+
567+
describe("reasoning effort support", () => {
568+
it("should pass reasoning effort for models that support it", async () => {
569+
const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1"
570+
571+
// Clear previous mocks and set up new implementation
572+
mockCreate.mockClear()
573+
mockCreate.mockImplementationOnce(async () => ({
574+
[Symbol.asyncIterator]: async function* () {
575+
yield { choices: [{ delta: { content: "test" } }], usage: null }
576+
},
577+
}))
578+
579+
const handlerWithModel = new ChutesHandler({
580+
apiModelId: modelId,
581+
chutesApiKey: "test-chutes-api-key",
582+
enableReasoningEffort: true,
583+
reasoningEffort: "high",
584+
})
585+
586+
const systemPrompt = "Test system prompt"
587+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
588+
589+
const generator = handlerWithModel.createMessage(systemPrompt, messages)
590+
await generator.next()
591+
592+
expect(mockCreate).toHaveBeenCalledWith(
593+
expect.objectContaining({
594+
reasoning_effort: "high",
595+
}),
596+
)
597+
})
598+
599+
it("should not pass reasoning effort for models that don't support it", async () => {
600+
const modelId: ChutesModelId = "unsloth/Llama-3.3-70B-Instruct"
601+
602+
// Clear previous mocks and set up new implementation
603+
mockCreate.mockClear()
604+
mockCreate.mockImplementationOnce(async () => ({
605+
[Symbol.asyncIterator]: async function* () {
606+
yield { choices: [{ delta: { content: "test" } }], usage: null }
607+
},
608+
}))
609+
610+
const handlerWithModel = new ChutesHandler({
611+
apiModelId: modelId,
612+
chutesApiKey: "test-chutes-api-key",
613+
enableReasoningEffort: true,
614+
reasoningEffort: "high",
615+
})
616+
617+
const systemPrompt = "Test system prompt"
618+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
619+
620+
const generator = handlerWithModel.createMessage(systemPrompt, messages)
621+
await generator.next()
622+
623+
const callArgs = mockCreate.mock.calls[0][0]
624+
expect(callArgs).not.toHaveProperty("reasoning_effort")
625+
})
626+
627+
it("should use model default reasoning effort when not explicitly set", async () => {
628+
const modelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8"
629+
630+
// Clear previous mocks and set up new implementation
631+
mockCreate.mockClear()
632+
mockCreate.mockImplementationOnce(async () => ({
633+
[Symbol.asyncIterator]: async function* () {
634+
yield { choices: [{ delta: { content: "test" } }], usage: null }
635+
},
636+
}))
637+
638+
const handlerWithModel = new ChutesHandler({
639+
apiModelId: modelId,
640+
chutesApiKey: "test-chutes-api-key",
641+
// Not setting enableReasoningEffort or reasoningEffort to test model defaults
642+
})
643+
644+
const systemPrompt = "Test system prompt"
645+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
646+
647+
const generator = handlerWithModel.createMessage(systemPrompt, messages)
648+
await generator.next()
649+
650+
// Since we don't set enableReasoningEffort to true, and just rely on model defaults,
651+
// the reasoning_effort will be included because the model has a default reasoningEffort
652+
expect(mockCreate).toHaveBeenCalledWith(
653+
expect.objectContaining({
654+
reasoning_effort: "medium", // Should use the model's default
655+
}),
656+
)
657+
})
658+
659+
it("should not pass reasoning effort when disabled", async () => {
660+
const modelId: ChutesModelId = "deepseek-ai/DeepSeek-R1"
661+
662+
// Clear previous mocks and set up new implementation
663+
mockCreate.mockClear()
664+
mockCreate.mockImplementationOnce(async () => ({
665+
[Symbol.asyncIterator]: async function* () {
666+
yield { choices: [{ delta: { content: "test" } }], usage: null }
667+
},
668+
}))
669+
670+
const handlerWithModel = new ChutesHandler({
671+
apiModelId: modelId,
672+
chutesApiKey: "test-chutes-api-key",
673+
enableReasoningEffort: false,
674+
reasoningEffort: "high",
675+
})
676+
677+
const systemPrompt = "Test system prompt"
678+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
679+
680+
const generator = handlerWithModel.createMessage(systemPrompt, messages)
681+
await generator.next()
682+
683+
const callArgs = mockCreate.mock.calls[0][0]
684+
expect(callArgs).not.toHaveProperty("reasoning_effort")
685+
})
686+
687+
it("should pass reasoning effort for thinking models", async () => {
688+
const modelId: ChutesModelId = "Qwen/Qwen3-235B-A22B-Thinking-2507"
689+
690+
// Clear previous mocks and set up new implementation
691+
mockCreate.mockClear()
692+
mockCreate.mockImplementationOnce(async () => ({
693+
[Symbol.asyncIterator]: async function* () {
694+
yield { choices: [{ delta: { content: "test" } }], usage: null }
695+
},
696+
}))
697+
698+
const handlerWithModel = new ChutesHandler({
699+
apiModelId: modelId,
700+
chutesApiKey: "test-chutes-api-key",
701+
reasoningEffort: "low", // Just set the reasoning effort, no need for enableReasoningEffort
702+
})
703+
704+
const systemPrompt = "Test system prompt"
705+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Test message" }]
706+
707+
const generator = handlerWithModel.createMessage(systemPrompt, messages)
708+
await generator.next()
709+
710+
expect(mockCreate).toHaveBeenCalledWith(
711+
expect.objectContaining({
712+
reasoning_effort: "low",
713+
}),
714+
)
715+
})
716+
})
566717
})

src/api/providers/chutes.ts

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ import { Anthropic } from "@anthropic-ai/sdk"
33
import OpenAI from "openai"
44

55
import type { ApiHandlerOptions } from "../../shared/api"
6+
import { getModelMaxOutputTokens } from "../../shared/api"
67
import { XmlMatcher } from "../../utils/xml-matcher"
78
import { convertToR1Format } from "../transform/r1-format"
89
import { convertToOpenAiMessages } from "../transform/openai-format"
910
import { ApiStream } from "../transform/stream"
11+
import { getModelParams } from "../transform/model-params"
1012

1113
import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider"
1214

@@ -27,12 +29,18 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
2729
systemPrompt: string,
2830
messages: Anthropic.Messages.MessageParam[],
2931
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
30-
const {
31-
id: model,
32-
info: { maxTokens: max_tokens },
33-
} = this.getModel()
32+
const { id: model, info, reasoning } = this.getModel()
3433

35-
const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
34+
const temperature = this.options.modelTemperature ?? info.temperature
35+
36+
// Use centralized cap logic for max_tokens
37+
const max_tokens =
38+
getModelMaxOutputTokens({
39+
modelId: model,
40+
model: info,
41+
settings: this.options,
42+
format: "openai",
43+
}) ?? undefined
3644

3745
return {
3846
model,
@@ -41,6 +49,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
4149
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
4250
stream: true,
4351
stream_options: { include_usage: true },
52+
...(reasoning && reasoning),
4453
}
4554
}
4655

@@ -85,19 +94,46 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
8594
yield processedChunk
8695
}
8796
} else {
88-
yield* super.createMessage(systemPrompt, messages)
97+
// For non-DeepSeek models, we need to handle reasoning effort
98+
const stream = await this.client.chat.completions.create(this.getCompletionParams(systemPrompt, messages))
99+
100+
for await (const chunk of stream) {
101+
const delta = chunk.choices[0]?.delta
102+
103+
if (delta?.content) {
104+
yield {
105+
type: "text",
106+
text: delta.content,
107+
}
108+
}
109+
110+
if (chunk.usage) {
111+
yield {
112+
type: "usage",
113+
inputTokens: chunk.usage.prompt_tokens || 0,
114+
outputTokens: chunk.usage.completion_tokens || 0,
115+
}
116+
}
117+
}
89118
}
90119
}
91120

92121
override getModel() {
93122
const model = super.getModel()
94123
const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
124+
const params = getModelParams({
125+
format: "openai",
126+
modelId: model.id,
127+
model: model.info,
128+
settings: this.options,
129+
})
95130
return {
96131
...model,
97132
info: {
98133
...model.info,
99134
temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature,
100135
},
136+
...params,
101137
}
102138
}
103139
}

webview-ui/src/components/settings/providers/Chutes.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ import type { ProviderSettings } from "@roo-code/types"
55

66
import { useAppTranslation } from "@src/i18n/TranslationContext"
77
import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
8+
import { useSelectedModel } from "@src/components/ui/hooks/useSelectedModel"
89

910
import { inputEventTransform } from "../transforms"
11+
import { ThinkingBudget } from "../ThinkingBudget"
1012

1113
type ChutesProps = {
1214
apiConfiguration: ProviderSettings
@@ -15,6 +17,7 @@ type ChutesProps = {
1517

1618
export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesProps) => {
1719
const { t } = useAppTranslation()
20+
const { info: modelInfo } = useSelectedModel(apiConfiguration)
1821

1922
const handleInputChange = useCallback(
2023
<K extends keyof ProviderSettings, E>(
@@ -45,6 +48,11 @@ export const Chutes = ({ apiConfiguration, setApiConfigurationField }: ChutesPro
4548
{t("settings:providers.getChutesApiKey")}
4649
</VSCodeButtonLink>
4750
)}
51+
<ThinkingBudget
52+
apiConfiguration={apiConfiguration}
53+
setApiConfigurationField={setApiConfigurationField}
54+
modelInfo={modelInfo}
55+
/>
4856
</>
4957
)
5058
}

0 commit comments

Comments
 (0)