Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ const baseProviderSettingsSchema = z.object({

// Model verbosity.
verbosity: verbosityLevelsSchema.optional(),

// Generic large input tier toggle applied across providers that define tiers
// When enabled, Roo will select the highest contextWindow tier (e.g. "over 200k" / "1M") if available.
largeInputTierEnabled: z.boolean().optional(),
})

// Several of the providers share common model config properties.
Expand Down
104 changes: 104 additions & 0 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,17 @@ export const vertexModels = {
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-sonnet-4@20250514[1m]": {
maxTokens: 8192,
contextWindow: 1_000_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 6.0,
outputPrice: 22.5,
cacheWritesPrice: 7.5,
cacheReadsPrice: 0.6,
supportsReasoningBudget: true,
},
"claude-sonnet-4-5@20250929": {
maxTokens: 8192,
contextWindow: 200_000,
Expand All @@ -187,6 +198,17 @@ export const vertexModels = {
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-sonnet-4-5@20250929[1m]": {
maxTokens: 8192,
contextWindow: 1_000_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 6.0,
outputPrice: 22.5,
cacheWritesPrice: 7.5,
cacheReadsPrice: 0.6,
supportsReasoningBudget: true,
},
"claude-haiku-4-5@20251001": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down Expand Up @@ -408,3 +430,85 @@ export const VERTEX_REGIONS = [
{ value: "me-central1", label: "me-central1" },
{ value: "africa-south1", label: "africa-south1" },
]

// Regional pricing constants for Vertex Claude Sonnet 4.5
export const VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS = ["us-east5", "europe-west1", "asia-southeast1"] as const

export type VertexRegionalPricingRegion = (typeof VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS)[number] | "global"

/**
* getVertexAdjustedModelInfo
*
* Centralizes Vertex Claude Sonnet pricing and 1M context adjustments.
* - Applies region-aware pricing for claude-sonnet-4-5@20250929
* - Applies global pricing for claude-sonnet-4@20250514
* - Enables 1M context window when either:
* • Model id ends with "[1m]" OR
* • largeInputTierEnabled is true
*/
export function getVertexAdjustedModelInfo(
id: string,
base: ModelInfo | undefined,
opts?: { region?: string; largeInputTierEnabled?: boolean },
): ModelInfo | undefined {
if (!base) return undefined

const isSonnet45 = id.startsWith("claude-sonnet-4-5@20250929")
const isSonnet4 = id.startsWith("claude-sonnet-4@20250514")

// If not a Sonnet 4/4.5 model, return base info as-is.
if (!isSonnet45 && !isSonnet4) return base

const region = opts?.region ?? "global"
const is1m = id.endsWith("[1m]") || opts?.largeInputTierEnabled === true

const regionalSet = new Set<string>(VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS)
const useRegionalPricing = regionalSet.has(region)

if (isSonnet45) {
if (is1m) {
// Over 200k (1M tier) with regional pricing
return {
...base,
contextWindow: 1_000_000,
inputPrice: useRegionalPricing ? 6.6 : 6.0,
outputPrice: useRegionalPricing ? 24.75 : 22.5,
cacheWritesPrice: useRegionalPricing ? 8.25 : 7.5,
cacheReadsPrice: useRegionalPricing ? 0.66 : 0.6,
}
}

// Under 200k with regional pricing
return {
...base,
contextWindow: 200_000,
inputPrice: useRegionalPricing ? 3.3 : 3.0,
outputPrice: useRegionalPricing ? 16.5 : 15.0,
cacheWritesPrice: useRegionalPricing ? 4.13 : 3.75,
cacheReadsPrice: useRegionalPricing ? 0.33 : 0.3,
}
}

// Sonnet 4 (global pricing only)
if (is1m) {
// Over 200k (1M tier)
return {
...base,
contextWindow: 1_000_000,
inputPrice: 6.0,
outputPrice: 22.5,
cacheWritesPrice: 7.5,
cacheReadsPrice: 0.6,
}
}

// Under 200k
return {
...base,
contextWindow: 200_000,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
}
}
30 changes: 30 additions & 0 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,36 @@ describe("VertexHandler", () => {
expect(modelInfo.info.contextWindow).toBe(200_000)
})

it("should return 1M context window for Claude Sonnet 4 [1m] variant", () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-4@20250514[1m]",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const modelInfo = handler.getModel()
// The provider strips the [1m] suffix when sending to API
expect(modelInfo.id).toBe("claude-sonnet-4@20250514")
expect(modelInfo.info).toBeDefined()
expect(modelInfo.info.maxTokens).toBe(8192)
expect(modelInfo.info.contextWindow).toBe(1_000_000)
})

it("should return 1M context window for Claude Sonnet 4.5 [1m] variant", () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-4-5@20250929[1m]",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const modelInfo = handler.getModel()
// The provider strips the [1m] suffix when sending to API
expect(modelInfo.id).toBe("claude-sonnet-4-5@20250929")
expect(modelInfo.info).toBeDefined()
expect(modelInfo.info.maxTokens).toBe(8192)
expect(modelInfo.info.contextWindow).toBe(1_000_000)
})

it("honors custom maxTokens for thinking models", () => {
const handler = new AnthropicVertexHandler({
apiKey: "test-api-key",
Expand Down
41 changes: 35 additions & 6 deletions src/api/providers/anthropic-vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,32 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
model: id,
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
temperature,
thinking,
// Cache the system prompt if caching is enabled.
system: supportsPromptCache
? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
: systemPrompt,
messages: supportsPromptCache ? addCacheBreakpoints(messages) : messages,
stream: true,
}
// Only set thinking if defined to avoid adding an explicit undefined property
if (thinking) {
;(params as any).thinking = thinking
}

const stream = await this.client.messages.create(params)
// Enable 1M context beta when using [1m] variants or when explicitly enabled via settings for Sonnet 4/4.5
const use1m =
this.options.apiModelId?.endsWith("[1m]") === true ||
((id === "claude-sonnet-4@20250514" || id === "claude-sonnet-4-5@20250929") &&
this.options.anthropicBeta1MContext === true)
Comment on lines +102 to +106
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code checks this.options.anthropicBeta1MContext which doesn't exist in the Vertex provider settings schema. The vertexSchema in provider-settings.ts (line 238) doesn't include anthropicBeta1MContext - that property only exists in anthropicSchema (line 199). This creates a critical discrepancy: when users enable largeInputTierEnabled for Vertex Sonnet 4/4.5 models, the frontend will charge 1M pricing but the backend won't send the required anthropic-beta: context-1m-2025-08-07 header, resulting in users being charged for 1M context while only receiving 200K. The check should use this.options.largeInputTierEnabled instead to match the frontend logic in useSelectedModel.ts line 253.


let stream
if (use1m) {
stream = await this.client.messages.create(params, {
headers: { "anthropic-beta": "context-1m-2025-08-07" },
})
} else {
stream = await this.client.messages.create(params)
}

for await (const chunk of stream) {
switch (chunk.type) {
Expand Down Expand Up @@ -171,8 +187,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
// The `:thinking` suffix indicates that the model is a "Hybrid"
// reasoning model and that reasoning is required to be enabled.
// The actual model ID honored by Anthropic's API does not have this
// suffix.
return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
// suffix. Additionally, strip the optional [1m] suffix used to
// denote the 1M context beta variant in Roo's model list.
const normalizedId = id.replace(":thinking", "").replace("[1m]", "")
return { id: normalizedId, info, ...params }
}

async completePrompt(prompt: string) {
Expand All @@ -189,7 +207,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
model: id,
max_tokens: maxTokens,
temperature,
thinking,
messages: [
{
role: "user",
Expand All @@ -200,8 +217,20 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
],
stream: false,
}
// Only set thinking if defined to avoid adding an explicit undefined property
if (thinking) {
;(params as any).thinking = thinking
}

// Enable 1M context beta when using [1m] variants or when explicitly enabled via settings for Sonnet 4/4.5
const use1m =
this.options.apiModelId?.endsWith("[1m]") === true ||
((id === "claude-sonnet-4@20250514" || id === "claude-sonnet-4-5@20250929") &&
this.options.anthropicBeta1MContext === true)

const response = await this.client.messages.create(params)
const response = use1m
? await this.client.messages.create(params, { headers: { "anthropic-beta": "context-1m-2025-08-07" } })
: await this.client.messages.create(params)
const content = response.content[0]

if (content.type === "text") {
Expand Down
64 changes: 58 additions & 6 deletions src/shared/cost.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,66 @@
import type { ModelInfo } from "@roo-code/types"

/**
* Determine effective per‑million prices for this request based on model tiers.
* If tiers are defined, pick the first tier whose contextWindow >= tierBasisTokens.
* Fallback to the last tier when all tiers are below the observed tokens.
*/
function selectTierPrices(
modelInfo: ModelInfo,
tierBasisTokens: number,
): {
inputPrice: number
outputPrice: number
cacheReadsPrice: number
} {
let inputPrice = modelInfo.inputPrice ?? 0
let outputPrice = modelInfo.outputPrice ?? 0
let cacheReadsPrice = modelInfo.cacheReadsPrice ?? 0

const tiers = (modelInfo as ModelInfo).tiers
if (Array.isArray(tiers) && tiers.length > 0) {
// If tiers are "service tiers" (e.g., OpenAI flex/priority), they will have a name.
// Do NOT auto-select by tokens in that case. Pricing is chosen explicitly by the provider path.
const hasNamedTiers = (tiers as any[]).some(
(t) => typeof (t as any).name === "string" && (t as any).name.length > 0,
)

if (!hasNamedTiers) {
// Choose the smallest tier that can accommodate the request's input size
let chosen =
tiers.find(
(t) =>
tierBasisTokens <=
(t.contextWindow === Infinity ? Number.POSITIVE_INFINITY : (t.contextWindow as number)),
) || tiers[tiers.length - 1]!

inputPrice = chosen.inputPrice ?? inputPrice
outputPrice = chosen.outputPrice ?? outputPrice
cacheReadsPrice = chosen.cacheReadsPrice ?? cacheReadsPrice
}
}

return { inputPrice, outputPrice, cacheReadsPrice }
}

function calculateApiCostInternal(
modelInfo: ModelInfo,
inputTokens: number,
outputTokens: number,
cacheCreationInputTokens: number,
cacheReadInputTokens: number,
// Use total input tokens (before cache deductions) to determine tier selection
tierBasisTokens: number,
): number {
const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
return totalCost
const { inputPrice, outputPrice, cacheReadsPrice } = selectTierPrices(modelInfo, tierBasisTokens)

const cacheWritesPrice = modelInfo.cacheWritesPrice || 0
const cacheWritesCost = (cacheWritesPrice / 1_000_000) * cacheCreationInputTokens
const cacheReadsCost = (cacheReadsPrice / 1_000_000) * cacheReadInputTokens
const baseInputCost = (inputPrice / 1_000_000) * inputTokens
const outputCost = (outputPrice / 1_000_000) * outputTokens

return cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
}

// For Anthropic compliant usage, the input tokens count does NOT include the
Expand All @@ -30,6 +78,8 @@ export function calculateApiCostAnthropic(
outputTokens,
cacheCreationInputTokens || 0,
cacheReadInputTokens || 0,
// Tier basis for Anthropic protocol = actual input tokens (no cache included)
inputTokens,
)
}

Expand All @@ -51,6 +101,8 @@ export function calculateApiCostOpenAI(
outputTokens,
cacheCreationInputTokensNum,
cacheReadInputTokensNum,
// Tier basis for OpenAI protocol = total input tokens before subtracting cache
inputTokens,
)
}

Expand Down
Loading