Skip to content
13 changes: 8 additions & 5 deletions evals/packages/types/src/roo-code-defaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,21 @@ export const rooCodeDefaults: RooCodeSettings = {
// thinking: false,
// },

modelTemperature: null,
// reasoningEffort: "high",

pinnedApiConfigs: {},
lastShownAnnouncementId: "mar-20-2025-3-10",
lastShownAnnouncementId: "apr-04-2025-boomerang",

autoApprovalEnabled: true,
alwaysAllowReadOnly: true,
alwaysAllowReadOnlyOutsideWorkspace: false,
alwaysAllowWrite: true,
alwaysAllowWriteOutsideWorkspace: false,
writeDelayMs: 200,
writeDelayMs: 1000,
alwaysAllowBrowser: true,
alwaysApproveResubmit: true,
requestDelaySeconds: 5,
requestDelaySeconds: 10,
alwaysAllowMcp: true,
alwaysAllowModeSwitch: true,
alwaysAllowSubtasks: true,
Expand All @@ -40,8 +43,8 @@ export const rooCodeDefaults: RooCodeSettings = {

browserToolEnabled: false,
browserViewportSize: "900x600",
screenshotQuality: 38,
remoteBrowserEnabled: true,
screenshotQuality: 75,
remoteBrowserEnabled: false,

enableCheckpoints: false,
checkpointStorage: "task",
Expand Down
13 changes: 9 additions & 4 deletions evals/packages/types/src/roo-code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export type TelemetrySetting = z.infer<typeof telemetrySettingsSchema>
*/

export const modelInfoSchema = z.object({
maxTokens: z.number().optional(),
maxTokens: z.number().nullish(),
contextWindow: z.number(),
supportsImages: z.boolean().optional(),
supportsComputerUse: z.boolean().optional(),
Expand Down Expand Up @@ -373,11 +373,14 @@ export const providerSettingsSchema = z.object({
requestyApiKey: z.string().optional(),
requestyModelId: z.string().optional(),
requestyModelInfo: modelInfoSchema.optional(),
// Generic
// Claude 3.7 Sonnet Thinking
modelMaxTokens: z.number().optional(), // Currently only used by Anthropic hybrid thinking models.
modelMaxThinkingTokens: z.number().optional(), // Currently only used by Anthropic hybrid thinking models.
modelTemperature: z.number().nullish(),
// Generic
includeMaxTokens: z.boolean().optional(),
modelTemperature: z.number().nullish(),
reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
rateLimitSeconds: z.number().optional(),
// Fake AI
fakeAi: z.unknown().optional(),
})
Expand Down Expand Up @@ -457,11 +460,13 @@ const providerSettingsRecord: ProviderSettingsRecord = {
requestyModelId: undefined,
requestyModelInfo: undefined,
// Claude 3.7 Sonnet Thinking
modelTemperature: undefined,
modelMaxTokens: undefined,
modelMaxThinkingTokens: undefined,
// Generic
includeMaxTokens: undefined,
modelTemperature: undefined,
reasoningEffort: undefined,
rateLimitSeconds: undefined,
// Fake AI
fakeAi: undefined,
}
Expand Down
6 changes: 5 additions & 1 deletion src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,25 @@ export function getModelParams({
model,
defaultMaxTokens,
defaultTemperature = 0,
defaultReasoningEffort,
}: {
options: ApiHandlerOptions
model: ModelInfo
defaultMaxTokens?: number
defaultTemperature?: number
defaultReasoningEffort?: "low" | "medium" | "high"
}) {
const {
modelMaxTokens: customMaxTokens,
modelMaxThinkingTokens: customMaxThinkingTokens,
modelTemperature: customTemperature,
reasoningEffort: customReasoningEffort,
} = options

let maxTokens = model.maxTokens ?? defaultMaxTokens
let thinking: BetaThinkingConfigParam | undefined = undefined
let temperature = customTemperature ?? defaultTemperature
const reasoningEffort = customReasoningEffort ?? defaultReasoningEffort

if (model.thinking) {
// Only honor `customMaxTokens` for thinking models.
Expand All @@ -118,5 +122,5 @@ export function getModelParams({
temperature = 1.0
}

return { maxTokens, thinking, temperature }
return { maxTokens, thinking, temperature, reasoningEffort }
}
17 changes: 16 additions & 1 deletion src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
const urlHost = this._getUrlHost(modelUrl)
const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
const ark = modelUrl.includes(".volces.com")

if (modelId.startsWith("o3-mini")) {
yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages)
return
Expand All @@ -94,6 +95,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}

let convertedMessages

if (deepseekReasoner) {
convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
} else if (ark || enabledLegacyFormat) {
Expand All @@ -112,16 +114,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
],
}
}

convertedMessages = [systemMessage, ...convertToOpenAiMessages(messages)]

if (modelInfo.supportsPromptCache) {
// Note: the following logic is copied from openrouter:
// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = convertedMessages.filter((msg) => msg.role === "user").slice(-2)

lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}

if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
Expand All @@ -130,6 +136,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
lastTextPart = { type: "text", text: "..." }
msg.content.push(lastTextPart)
}

// @ts-ignore-next-line
lastTextPart["cache_control"] = { type: "ephemeral" }
}
Expand All @@ -145,7 +152,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
messages: convertedMessages,
stream: true as const,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
reasoning_effort: modelId === "grok-3-mini-beta" ? "high" : this.getModel().info.reasoningEffort,
}

if (this.options.includeMaxTokens) {
requestOptions.max_tokens = modelInfo.maxTokens
}
Expand Down Expand Up @@ -185,6 +194,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
lastUsage = chunk.usage
}
}

for (const chunk of matcher.final()) {
yield chunk
}
Expand Down Expand Up @@ -217,6 +227,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
type: "text",
text: response.choices[0]?.message.content || "",
}

yield this.processUsageMetrics(response.usage, modelInfo)
}
}
Expand All @@ -241,6 +252,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
async completePrompt(prompt: string): Promise<string> {
try {
const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)

const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
model: this.getModel().id,
messages: [{ role: "user", content: prompt }],
Expand All @@ -250,11 +262,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
requestOptions,
isAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {},
)

return response.choices[0]?.message.content || ""
} catch (error) {
if (error instanceof Error) {
throw new Error(`OpenAI completion error: ${error.message}`)
}

throw error
}
}
Expand All @@ -281,7 +295,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
],
stream: true,
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
reasoning_effort: this.getModel().info.reasoningEffort,
reasoning_effort: modelId === "grok-3-mini-beta" ? "high" : this.getModel().info.reasoningEffort,
},
methodIsAzureAiInference ? { path: AZURE_AI_INFERENCE_PATH } : {},
)
Expand Down Expand Up @@ -333,6 +347,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}
}

private _getUrlHost(baseUrl?: string): string {
try {
return new URL(baseUrl ?? "").host
Expand Down
15 changes: 12 additions & 3 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
import axios, { AxiosRequestConfig } from "axios"
import axios from "axios"
import OpenAI from "openai"
import delay from "delay"

import { ApiHandlerOptions, ModelInfo, openRouterDefaultModelId, openRouterDefaultModelInfo } from "../../shared/api"
import { parseApiPrice } from "../../utils/cost"
Expand All @@ -22,6 +21,12 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
transforms?: string[]
include_reasoning?: boolean
thinking?: BetaThinkingConfigParam
// https://openrouter.ai/docs/use-cases/reasoning-tokens
reasoning?: {
effort?: "high" | "medium" | "low"
max_tokens?: number
exclude?: boolean
}
}

export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
Expand All @@ -42,7 +47,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
): AsyncGenerator<ApiStreamChunk> {
let { id: modelId, maxTokens, thinking, temperature, topP } = this.getModel()
let { id: modelId, maxTokens, thinking, temperature, topP, reasoningEffort } = this.getModel()

// Convert Anthropic messages to OpenAI format.
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
Expand Down Expand Up @@ -70,13 +75,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
},
],
}

// Add cache_control to the last two user messages
// (note: this works because we only ever add one user message at a time, but if we added multiple we'd need to mark the user message before the last assistant message)
const lastTwoUserMessages = openAiMessages.filter((msg) => msg.role === "user").slice(-2)

lastTwoUserMessages.forEach((msg) => {
if (typeof msg.content === "string") {
msg.content = [{ type: "text", text: msg.content }]
}

if (Array.isArray(msg.content)) {
// NOTE: this is fine since env details will always be added at the end. but if it weren't there, and the user added a image_url type message, it would pop a text part before it and then move it after to the end.
let lastTextPart = msg.content.filter((part) => part.type === "text").pop()
Expand Down Expand Up @@ -113,6 +121,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
}),
// This way, the transforms field will only be included in the parameters when openRouterUseMiddleOutTransform is true.
...((this.options.openRouterUseMiddleOutTransform ?? true) && { transforms: ["middle-out"] }),
...(reasoningEffort && { reasoning: { effort: reasoningEffort } }),
}

const stream = await this.client.chat.completions.create(completionParams)
Expand Down
3 changes: 2 additions & 1 deletion src/exports/roo-code.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,11 @@ type ProviderSettings = {
cachableFields?: string[] | undefined
} | null)
| undefined
modelTemperature?: (number | null) | undefined
modelMaxTokens?: number | undefined
modelMaxThinkingTokens?: number | undefined
includeMaxTokens?: boolean | undefined
modelTemperature?: (number | null) | undefined
reasoningEffort?: ("low" | "medium" | "high") | undefined
rateLimitSeconds?: number | undefined
fakeAi?: unknown | undefined
}
Expand Down
3 changes: 2 additions & 1 deletion src/exports/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,11 @@ type ProviderSettings = {
cachableFields?: string[] | undefined
} | null)
| undefined
modelTemperature?: (number | null) | undefined
modelMaxTokens?: number | undefined
modelMaxThinkingTokens?: number | undefined
includeMaxTokens?: boolean | undefined
modelTemperature?: (number | null) | undefined
reasoningEffort?: ("low" | "medium" | "high") | undefined
rateLimitSeconds?: number | undefined
fakeAi?: unknown | undefined
}
Expand Down
18 changes: 15 additions & 3 deletions src/schemas/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ export const telemetrySettingsSchema = z.enum(telemetrySettings)

export type TelemetrySetting = z.infer<typeof telemetrySettingsSchema>

/**
* ReasoningEffort
*/

export const reasoningEfforts = ["low", "medium", "high"] as const

export const reasoningEffortsSchema = z.enum(reasoningEfforts)

export type ReasoningEffort = z.infer<typeof reasoningEffortsSchema>

/**
* ModelInfo
*/
Expand All @@ -110,7 +120,7 @@ export const modelInfoSchema = z.object({
cacheWritesPrice: z.number().optional(),
cacheReadsPrice: z.number().optional(),
description: z.string().optional(),
reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
reasoningEffort: reasoningEffortsSchema.optional(),
thinking: z.boolean().optional(),
minTokensPerCachePoint: z.number().optional(),
maxCachePoints: z.number().optional(),
Expand Down Expand Up @@ -383,11 +393,12 @@ export const providerSettingsSchema = z.object({
requestyModelId: z.string().optional(),
requestyModelInfo: modelInfoSchema.nullish(),
// Claude 3.7 Sonnet Thinking
modelTemperature: z.number().nullish(),
modelMaxTokens: z.number().optional(),
modelMaxThinkingTokens: z.number().optional(),
// Generic
includeMaxTokens: z.boolean().optional(),
modelTemperature: z.number().nullish(),
reasoningEffort: reasoningEffortsSchema.optional(),
rateLimitSeconds: z.number().optional(),
// Fake AI
fakeAi: z.unknown().optional(),
Expand Down Expand Up @@ -470,11 +481,12 @@ const providerSettingsRecord: ProviderSettingsRecord = {
requestyModelId: undefined,
requestyModelInfo: undefined,
// Claude 3.7 Sonnet Thinking
modelTemperature: undefined,
modelMaxTokens: undefined,
modelMaxThinkingTokens: undefined,
// Generic
includeMaxTokens: undefined,
modelTemperature: undefined,
reasoningEffort: undefined,
rateLimitSeconds: undefined,
// Fake AI
fakeAi: undefined,
Expand Down
Loading