Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions packages/types/src/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,32 @@ export const geminiModels = {
},
],
},
"gemini-2.5-pro-preview-06-05": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.5, // This is the pricing for prompts above 200k tokens.
outputPrice: 15,
cacheReadsPrice: 0.625,
cacheWritesPrice: 4.5,
maxThinkingTokens: 32_768,
supportsReasoningBudget: true,
tiers: [
{
contextWindow: 200_000,
inputPrice: 1.25,
outputPrice: 10,
cacheReadsPrice: 0.31,
},
{
contextWindow: Infinity,
inputPrice: 2.5,
outputPrice: 15,
cacheReadsPrice: 0.625,
},
],
},
"gemini-2.0-flash-001": {
maxTokens: 8192,
contextWindow: 1_048_576,
Expand Down
20 changes: 15 additions & 5 deletions packages/types/src/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,26 @@ export const OPEN_ROUTER_COMPUTER_USE_MODELS = new Set([
"anthropic/claude-opus-4",
])

// When we first launched these models we didn't have support for
// enabling/disabling the reasoning budget for hybrid models. Now that we
// do support this we should give users the option to enable/disable it
// whenever possible. However these particular (virtual) model ids with the
// `:thinking` suffix always require the reasoning budget to be enabled, so
// for backwards compatibility we should still require it.
// We should *not* be adding new models to this set.
export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-3.7-sonnet:thinking",
"google/gemini-2.5-flash-preview-05-20:thinking",
])

export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-3.7-sonnet:beta",
"anthropic/claude-3.7-sonnet:thinking",
"anthropic/claude-opus-4",
"anthropic/claude-sonnet-4",
"google/gemini-2.5-pro-preview",
"google/gemini-2.5-flash-preview-05-20",
"google/gemini-2.5-flash-preview-05-20:thinking",
])

export const OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS = new Set([
// Also include the models that require the reasoning budget to be enabled
// even though `OPEN_ROUTER_REQUIRED_REASONING_BUDGET_MODELS` takes precedence.
"anthropic/claude-3.7-sonnet:thinking",
"google/gemini-2.5-flash-preview-05-20:thinking",
])
11 changes: 11 additions & 0 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ export const vertexModels = {
inputPrice: 2.5,
outputPrice: 15,
},
"gemini-2.5-pro-preview-06-05": {
maxTokens: 65_535,
contextWindow: 1_048_576,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.5,
outputPrice: 15,
maxThinkingTokens: 32_768,
supportsReasoningBudget: true,
},
"gemini-2.5-pro-exp-03-25": {
maxTokens: 65_535,
contextWindow: 1_048_576,
Expand Down Expand Up @@ -217,6 +227,7 @@ export const vertexModels = {
} as const satisfies Record<string, ModelInfo>

export const VERTEX_REGIONS = [
{ value: "global", label: "global" },
{ value: "us-east5", label: "us-east5" },
{ value: "us-central1", label: "us-central1" },
{ value: "europe-west1", label: "europe-west1" },
Expand Down
6 changes: 4 additions & 2 deletions src/api/providers/fetchers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,11 @@ describe("OpenRouter API", () => {

expect(endpoints).toEqual({
Google: {
maxTokens: 0,
maxTokens: 65535,
contextWindow: 1048576,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
inputPrice: 1.25,
outputPrice: 10,
cacheWritesPrice: 1.625,
Expand All @@ -198,10 +199,11 @@ describe("OpenRouter API", () => {
supportedParameters: undefined,
},
"Google AI Studio": {
maxTokens: 0,
maxTokens: 65536,
contextWindow: 1048576,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
inputPrice: 1.25,
outputPrice: 10,
cacheWritesPrice: 1.625,
Expand Down
64 changes: 35 additions & 29 deletions src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { safeJsonParse } from "../../shared/safeJsonParse"

import { convertAnthropicContentToGemini, convertAnthropicMessageToGemini } from "../transform/gemini-format"
import type { ApiStream } from "../transform/stream"
import { getModelParams } from "../transform/model-params"

import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { BaseProvider } from "./base-provider"
Expand Down Expand Up @@ -62,15 +63,15 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const { id: model, thinkingConfig, maxOutputTokens, info } = this.getModel()
const { id: model, info, reasoning: thinkingConfig, maxTokens } = this.getModel()

const contents = messages.map(convertAnthropicMessageToGemini)

const config: GenerateContentConfig = {
systemInstruction,
httpOptions: this.options.googleGeminiBaseUrl ? { baseUrl: this.options.googleGeminiBaseUrl } : undefined,
thinkingConfig,
maxOutputTokens,
maxOutputTokens: this.options.modelMaxTokens ?? maxTokens ?? undefined,
temperature: this.options.modelTemperature ?? 0,
}

Expand All @@ -81,7 +82,28 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined

for await (const chunk of result) {
if (chunk.text) {
// Process candidates and their parts to separate thoughts from content
if (chunk.candidates && chunk.candidates.length > 0) {
const candidate = chunk.candidates[0]
if (candidate.content && candidate.content.parts) {
for (const part of candidate.content.parts) {
if (part.thought) {
// This is a thinking/reasoning part
if (part.text) {
yield { type: "reasoning", text: part.text }
}
} else {
// This is regular content
if (part.text) {
yield { type: "text", text: part.text }
}
}
}
}
}

// Fallback to the original text property if no candidates structure
else if (chunk.text) {
yield { type: "text", text: chunk.text }
}

Expand All @@ -108,32 +130,16 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
}

override getModel() {
let id = this.options.apiModelId ?? geminiDefaultModelId
let info: ModelInfo = geminiModels[id as GeminiModelId]

if (id?.endsWith(":thinking")) {
id = id.slice(0, -":thinking".length)

if (geminiModels[id as GeminiModelId]) {
info = geminiModels[id as GeminiModelId]

return {
id,
info,
thinkingConfig: this.options.modelMaxThinkingTokens
? { thinkingBudget: this.options.modelMaxThinkingTokens }
: undefined,
maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
}
}
}

if (!info) {
id = geminiDefaultModelId
info = geminiModels[geminiDefaultModelId]
}

return { id, info }
const modelId = this.options.apiModelId
let id = modelId && modelId in geminiModels ? (modelId as GeminiModelId) : geminiDefaultModelId
const info: ModelInfo = geminiModels[id]
const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options })

// The `:thinking` suffix indicates that the model is a "Hybrid"
// reasoning model and that reasoning is required to be enabled.
// The actual model ID honored by Gemini's API does not have this
// suffix.
return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
}

async completePrompt(prompt: string): Promise<string> {
Expand Down
9 changes: 9 additions & 0 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

let { id: modelId, maxTokens, temperature, topP, reasoning } = model

// OpenRouter sends reasoning tokens by default for Gemini 2.5 Pro
// Preview even if you don't request them. This is not the default for
// other providers (including Gemini), so we need to explicitly disable
// i We should generalize this using the logic in `getModelParams`, but
// this is easier for now.
if (modelId === "google/gemini-2.5-pro-preview" && typeof reasoning === "undefined") {
reasoning = { exclude: true }
}

// Convert Anthropic messages to OpenAI format.
let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
Expand Down
38 changes: 12 additions & 26 deletions src/api/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { type ModelInfo, type VertexModelId, vertexDefaultModelId, vertexModels

import type { ApiHandlerOptions } from "../../shared/api"

import { getModelParams } from "../transform/model-params"

import { GeminiHandler } from "./gemini"
import { SingleCompletionHandler } from "../index"

Expand All @@ -11,31 +13,15 @@ export class VertexHandler extends GeminiHandler implements SingleCompletionHand
}

override getModel() {
let id = this.options.apiModelId ?? vertexDefaultModelId
let info: ModelInfo = vertexModels[id as VertexModelId]

if (id?.endsWith(":thinking")) {
id = id.slice(0, -":thinking".length) as VertexModelId

if (vertexModels[id as VertexModelId]) {
info = vertexModels[id as VertexModelId]

return {
id,
info,
thinkingConfig: this.options.modelMaxThinkingTokens
? { thinkingBudget: this.options.modelMaxThinkingTokens }
: undefined,
maxOutputTokens: this.options.modelMaxTokens ?? info.maxTokens ?? undefined,
}
}
}

if (!info) {
id = vertexDefaultModelId
info = vertexModels[vertexDefaultModelId]
}

return { id, info }
const modelId = this.options.apiModelId
let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId
const info: ModelInfo = vertexModels[id]
const params = getModelParams({ format: "gemini", modelId: id, model: info, settings: this.options })

// The `:thinking` suffix indicates that the model is a "Hybrid"
// reasoning model and that reasoning is required to be enabled.
// The actual model ID honored by Gemini's API does not have this
// suffix.
return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
}
}
30 changes: 23 additions & 7 deletions src/api/transform/model-params.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ import { shouldUseReasoningBudget, shouldUseReasoningEffort } from "../../shared
import {
type AnthropicReasoningParams,
type OpenAiReasoningParams,
type GeminiReasoningParams,
type OpenRouterReasoningParams,
getAnthropicReasoning,
getOpenAiReasoning,
getGeminiReasoning,
getOpenRouterReasoning,
} from "./reasoning"

type GetModelParamsOptions<T extends "openai" | "anthropic" | "openrouter"> = {
type Format = "anthropic" | "openai" | "gemini" | "openrouter"

type GetModelParamsOptions<T extends Format> = {
format: T
modelId: string
model: ModelInfo
Expand All @@ -26,34 +30,40 @@ type BaseModelParams = {
reasoningBudget: number | undefined
}

type AnthropicModelParams = {
format: "anthropic"
reasoning: AnthropicReasoningParams | undefined
} & BaseModelParams

type OpenAiModelParams = {
format: "openai"
reasoning: OpenAiReasoningParams | undefined
} & BaseModelParams

type AnthropicModelParams = {
format: "anthropic"
reasoning: AnthropicReasoningParams | undefined
type GeminiModelParams = {
format: "gemini"
reasoning: GeminiReasoningParams | undefined
} & BaseModelParams

type OpenRouterModelParams = {
format: "openrouter"
reasoning: OpenRouterReasoningParams | undefined
} & BaseModelParams

export type ModelParams = OpenAiModelParams | AnthropicModelParams | OpenRouterModelParams
export type ModelParams = AnthropicModelParams | OpenAiModelParams | GeminiModelParams | OpenRouterModelParams

// Function overloads for specific return types
export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
export function getModelParams(options: GetModelParamsOptions<"anthropic">): AnthropicModelParams
export function getModelParams(options: GetModelParamsOptions<"openai">): OpenAiModelParams
export function getModelParams(options: GetModelParamsOptions<"gemini">): GeminiModelParams
export function getModelParams(options: GetModelParamsOptions<"openrouter">): OpenRouterModelParams
export function getModelParams({
format,
modelId,
model,
settings,
defaultTemperature = 0,
}: GetModelParamsOptions<"openai" | "anthropic" | "openrouter">): ModelParams {
}: GetModelParamsOptions<Format>): ModelParams {
const {
modelMaxTokens: customMaxTokens,
modelMaxThinkingTokens: customMaxThinkingTokens,
Expand Down Expand Up @@ -121,6 +131,12 @@ export function getModelParams({
...params,
reasoning: getOpenAiReasoning({ model, reasoningBudget, reasoningEffort, settings }),
}
} else if (format === "gemini") {
return {
format,
...params,
reasoning: getGeminiReasoning({ model, reasoningBudget, reasoningEffort, settings }),
}
} else {
// Special case for o1-pro, which doesn't support temperature.
// Note that OpenRouter's `supported_parameters` field includes
Expand Down
12 changes: 12 additions & 0 deletions src/api/transform/reasoning.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
import OpenAI from "openai"
import type { GenerateContentConfig } from "@google/genai"

import type { ModelInfo, ProviderSettings } from "@roo-code/types"

Expand All @@ -17,6 +18,8 @@ export type AnthropicReasoningParams = BetaThinkingConfigParam

export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] }

export type GeminiReasoningParams = GenerateContentConfig["thinkingConfig"]

export type GetModelReasoningOptions = {
model: ModelInfo
reasoningBudget: number | undefined
Expand Down Expand Up @@ -49,3 +52,12 @@ export const getOpenAiReasoning = ({
settings,
}: GetModelReasoningOptions): OpenAiReasoningParams | undefined =>
shouldUseReasoningEffort({ model, settings }) ? { reasoning_effort: reasoningEffort } : undefined

export const getGeminiReasoning = ({
model,
reasoningBudget,
settings,
}: GetModelReasoningOptions): GeminiReasoningParams | undefined =>
shouldUseReasoningBudget({ model, settings })
? { thinkingBudget: reasoningBudget!, includeThoughts: true }
: undefined
3 changes: 1 addition & 2 deletions webview-ui/src/components/settings/ModelInfoView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ export const ModelInfoView = ({
),
apiProvider === "gemini" && (
<span className="italic">
{selectedModelId === "gemini-2.5-pro-preview-03-25" ||
selectedModelId === "gemini-2.5-pro-preview-05-06"
{selectedModelId.includes("pro-preview")
? t("settings:modelInfo.gemini.billingEstimate")
: t("settings:modelInfo.gemini.freeRequests", {
count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,
Expand Down
Loading