Skip to content

Commit fa1e7b4

Browse files
authored
Re-work support for reasoning models, including "hybrid" reasoning models (#3870)
1 parent 7206da9 commit fa1e7b4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+2606
-1071
lines changed

evals/packages/types/src/roo-code.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,12 +335,14 @@ export type ProviderSettingsEntry = z.infer<typeof providerSettingsEntrySchema>
335335

336336
const genericProviderSettingsSchema = z.object({
337337
includeMaxTokens: z.boolean().optional(),
338-
reasoningEffort: reasoningEffortsSchema.optional(),
339338
diffEnabled: z.boolean().optional(),
340339
fuzzyMatchThreshold: z.number().optional(),
341340
modelTemperature: z.number().nullish(),
342341
rateLimitSeconds: z.number().optional(),
343-
// Claude 3.7 Sonnet Thinking
342+
343+
// Model reasoning.
344+
enableReasoningEffort: z.boolean().optional(),
345+
reasoningEffort: reasoningEffortsSchema.optional(),
344346
modelMaxTokens: z.number().optional(),
345347
modelMaxThinkingTokens: z.number().optional(),
346348
})

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"clean": "turbo clean --log-order grouped --output-logs new-only && rimraf dist out bin .vite-port .turbo",
1515
"build": "pnpm --filter roo-cline vsix",
1616
"build:nightly": "pnpm --filter @roo-code/vscode-nightly vsix",
17+
"generate-types": "pnpm --filter roo-cline generate-types",
1718
"changeset:version": "cp CHANGELOG.md src/CHANGELOG.md && changeset version && cp -vf src/CHANGELOG.md .",
1819
"knip": "pnpm --filter @roo-code/build build && knip --include files",
1920
"update-contributors": "node scripts/update-contributors.js"

src/api/__tests__/index.test.ts

Lines changed: 0 additions & 257 deletions
This file was deleted.

src/api/index.ts

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2-
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta/messages/index.mjs"
32

4-
import { ProviderSettings, ModelInfo, ApiHandlerOptions } from "../shared/api"
5-
import { ANTHROPIC_DEFAULT_MAX_TOKENS } from "./providers/constants"
3+
import { ProviderSettings, ModelInfo } from "../shared/api"
64
import { GlamaHandler } from "./providers/glama"
75
import { AnthropicHandler } from "./providers/anthropic"
86
import { AwsBedrockHandler } from "./providers/bedrock"
@@ -101,45 +99,3 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
10199
return new AnthropicHandler(options)
102100
}
103101
}
104-
105-
export function getModelParams({
106-
options,
107-
model,
108-
defaultMaxTokens,
109-
defaultTemperature = 0,
110-
defaultReasoningEffort,
111-
}: {
112-
options: ApiHandlerOptions
113-
model: ModelInfo
114-
defaultMaxTokens?: number
115-
defaultTemperature?: number
116-
defaultReasoningEffort?: "low" | "medium" | "high"
117-
}) {
118-
const {
119-
modelMaxTokens: customMaxTokens,
120-
modelMaxThinkingTokens: customMaxThinkingTokens,
121-
modelTemperature: customTemperature,
122-
reasoningEffort: customReasoningEffort,
123-
} = options
124-
125-
let maxTokens = model.maxTokens ?? defaultMaxTokens
126-
let thinking: BetaThinkingConfigParam | undefined = undefined
127-
let temperature = customTemperature ?? defaultTemperature
128-
const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort
129-
130-
if (model.thinking) {
131-
// Only honor `customMaxTokens` for thinking models.
132-
maxTokens = customMaxTokens ?? maxTokens
133-
134-
// Clamp the thinking budget to be at most 80% of max tokens and at
135-
// least 1024 tokens.
136-
const maxBudgetTokens = Math.floor((maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS) * 0.8)
137-
const budgetTokens = Math.max(Math.min(customMaxThinkingTokens ?? maxBudgetTokens, maxBudgetTokens), 1024)
138-
thinking = { type: "enabled", budget_tokens: budgetTokens }
139-
140-
// Anthropic "Thinking" models require a temperature of 1.0.
141-
temperature = 1.0
142-
}
143-
144-
return { maxTokens, thinking, temperature, reasoningEffort }
145-
}

src/api/providers/__tests__/anthropic-vertex.test.ts

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ describe("VertexHandler", () => {
701701

702702
const result = handler.getModel()
703703
expect(result.maxTokens).toBe(32_768)
704-
expect(result.thinking).toEqual({ type: "enabled", budget_tokens: 16_384 })
704+
expect(result.reasoningBudget).toEqual(16_384)
705705
expect(result.temperature).toBe(1.0)
706706
})
707707

@@ -715,7 +715,7 @@ describe("VertexHandler", () => {
715715

716716
const result = handler.getModel()
717717
expect(result.maxTokens).toBe(8192)
718-
expect(result.thinking).toBeUndefined()
718+
expect(result.reasoningBudget).toBeUndefined()
719719
expect(result.temperature).toBe(0)
720720
})
721721
})
@@ -732,13 +732,9 @@ describe("VertexHandler", () => {
732732

733733
const modelInfo = thinkingHandler.getModel()
734734

735-
// Verify thinking configuration
736735
expect(modelInfo.id).toBe("claude-3-7-sonnet@20250219")
737-
expect(modelInfo.thinking).toBeDefined()
738-
const thinkingConfig = modelInfo.thinking as { type: "enabled"; budget_tokens: number }
739-
expect(thinkingConfig.type).toBe("enabled")
740-
expect(thinkingConfig.budget_tokens).toBe(4096)
741-
expect(modelInfo.temperature).toBe(1.0) // Thinking requires temperature 1.0
736+
expect(modelInfo.reasoningBudget).toBe(4096)
737+
expect(modelInfo.temperature).toBe(1.0) // Thinking requires temperature 1.0.
742738
})
743739

744740
it("should calculate thinking budget correctly", () => {
@@ -751,7 +747,7 @@ describe("VertexHandler", () => {
751747
modelMaxThinkingTokens: 5000,
752748
})
753749

754-
expect((handlerWithBudget.getModel().thinking as any).budget_tokens).toBe(5000)
750+
expect(handlerWithBudget.getModel().reasoningBudget).toBe(5000)
755751

756752
// Test with default thinking budget (80% of max tokens)
757753
const handlerWithDefaultBudget = new AnthropicVertexHandler({
@@ -761,7 +757,7 @@ describe("VertexHandler", () => {
761757
modelMaxTokens: 10000,
762758
})
763759

764-
expect((handlerWithDefaultBudget.getModel().thinking as any).budget_tokens).toBe(8000) // 80% of 10000
760+
expect(handlerWithDefaultBudget.getModel().reasoningBudget).toBe(8000) // 80% of 10000
765761

766762
// Test with minimum thinking budget (should be at least 1024)
767763
const handlerWithSmallMaxTokens = new AnthropicVertexHandler({
@@ -771,7 +767,7 @@ describe("VertexHandler", () => {
771767
modelMaxTokens: 1000, // This would result in 800 tokens for thinking, but minimum is 1024
772768
})
773769

774-
expect((handlerWithSmallMaxTokens.getModel().thinking as any).budget_tokens).toBe(1024)
770+
expect(handlerWithSmallMaxTokens.getModel().reasoningBudget).toBe(1024)
775771
})
776772

777773
it("should pass thinking configuration to API", async () => {

0 commit comments

Comments
 (0)