RooCodeInc · roomote · Oct 16, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
@@ -181,6 +181,10 @@ const baseProviderSettingsSchema = z.object({
 
 	// Model verbosity.
 	verbosity: verbosityLevelsSchema.optional(),
+
+	// Generic large input tier toggle applied across providers that define tiers
+	// When enabled, Roo will select the highest contextWindow tier (e.g. "over 200k" / "1M") if available.
+	largeInputTierEnabled: z.boolean().optional(),
 })
 
 // Several of the providers share common model config properties.

@@ -175,6 +175,17 @@ export const vertexModels = {
 		cacheReadsPrice: 0.3,
 		supportsReasoningBudget: true,
 	},
+	"claude-sonnet-4@20250514[1m]": {
+		maxTokens: 8192,
+		contextWindow: 1_000_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 6.0,
+		outputPrice: 22.5,
+		cacheWritesPrice: 7.5,
+		cacheReadsPrice: 0.6,
+		supportsReasoningBudget: true,
+	},
 	"claude-sonnet-4-5@20250929": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -187,6 +198,17 @@ export const vertexModels = {
 		cacheReadsPrice: 0.3,
 		supportsReasoningBudget: true,
 	},
+	"claude-sonnet-4-5@20250929[1m]": {
+		maxTokens: 8192,
+		contextWindow: 1_000_000,
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 6.0,
+		outputPrice: 22.5,
+		cacheWritesPrice: 7.5,
+		cacheReadsPrice: 0.6,
+		supportsReasoningBudget: true,
+	},
 	"claude-haiku-4-5@20251001": {
 		maxTokens: 8192,
 		contextWindow: 200_000,
@@ -408,3 +430,85 @@ export const VERTEX_REGIONS = [
 	{ value: "me-central1", label: "me-central1" },
 	{ value: "africa-south1", label: "africa-south1" },
 ]
+
+// Regional pricing constants for Vertex Claude Sonnet 4.5
+export const VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS = ["us-east5", "europe-west1", "asia-southeast1"] as const
+
+export type VertexRegionalPricingRegion = (typeof VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS)[number] | "global"
+
+/**
+ * getVertexAdjustedModelInfo
+ *
+ * Centralizes Vertex Claude Sonnet pricing and 1M context adjustments.
+ * - Applies region-aware pricing for claude-sonnet-4-5@20250929
+ * - Applies global pricing for claude-sonnet-4@20250514
+ * - Enables 1M context window when either:
+ *   • Model id ends with "[1m]" OR
+ *   • largeInputTierEnabled is true
+ */
+export function getVertexAdjustedModelInfo(
+	id: string,
+	base: ModelInfo | undefined,
+	opts?: { region?: string; largeInputTierEnabled?: boolean },
+): ModelInfo | undefined {
+	if (!base) return undefined
+
+	const isSonnet45 = id.startsWith("claude-sonnet-4-5@20250929")
+	const isSonnet4 = id.startsWith("claude-sonnet-4@20250514")
+
+	// If not a Sonnet 4/4.5 model, return base info as-is.
+	if (!isSonnet45 && !isSonnet4) return base
+
+	const region = opts?.region ?? "global"
+	const is1m = id.endsWith("[1m]") || opts?.largeInputTierEnabled === true
+
+	const regionalSet = new Set<string>(VERTEX_SONNET_45_REGIONAL_PRICING_REGIONS)
+	const useRegionalPricing = regionalSet.has(region)
+
+	if (isSonnet45) {
+		if (is1m) {
+			// Over 200k (1M tier) with regional pricing
+			return {
+				...base,
+				contextWindow: 1_000_000,
+				inputPrice: useRegionalPricing ? 6.6 : 6.0,
+				outputPrice: useRegionalPricing ? 24.75 : 22.5,
+				cacheWritesPrice: useRegionalPricing ? 8.25 : 7.5,
+				cacheReadsPrice: useRegionalPricing ? 0.66 : 0.6,
+			}
+		}
+
+		// Under 200k with regional pricing
+		return {
+			...base,
+			contextWindow: 200_000,
+			inputPrice: useRegionalPricing ? 3.3 : 3.0,
+			outputPrice: useRegionalPricing ? 16.5 : 15.0,
+			cacheWritesPrice: useRegionalPricing ? 4.13 : 3.75,
+			cacheReadsPrice: useRegionalPricing ? 0.33 : 0.3,
+		}
+	}
+
+	// Sonnet 4 (global pricing only)
+	if (is1m) {
+		// Over 200k (1M tier)
+		return {
+			...base,
+			contextWindow: 1_000_000,
+			inputPrice: 6.0,
+			outputPrice: 22.5,
+			cacheWritesPrice: 7.5,
+			cacheReadsPrice: 0.6,
+		}
+	}
+
+	// Under 200k
+	return {
+		...base,
+		contextWindow: 200_000,
+		inputPrice: 3.0,
+		outputPrice: 15.0,
+		cacheWritesPrice: 3.75,
+		cacheReadsPrice: 0.3,
+	}
+}
@@ -691,6 +691,36 @@ describe("VertexHandler", () => {
 			expect(modelInfo.info.contextWindow).toBe(200_000)
 		})
 
+		it("should return 1M context window for Claude Sonnet 4 [1m] variant", () => {
+			handler = new AnthropicVertexHandler({
+				apiModelId: "claude-sonnet-4@20250514[1m]",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const modelInfo = handler.getModel()
+			// The provider strips the [1m] suffix when sending to API
+			expect(modelInfo.id).toBe("claude-sonnet-4@20250514")
+			expect(modelInfo.info).toBeDefined()
+			expect(modelInfo.info.maxTokens).toBe(8192)
+			expect(modelInfo.info.contextWindow).toBe(1_000_000)
+		})
+
+		it("should return 1M context window for Claude Sonnet 4.5 [1m] variant", () => {
+			handler = new AnthropicVertexHandler({
+				apiModelId: "claude-sonnet-4-5@20250929[1m]",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const modelInfo = handler.getModel()
+			// The provider strips the [1m] suffix when sending to API
+			expect(modelInfo.id).toBe("claude-sonnet-4-5@20250929")
+			expect(modelInfo.info).toBeDefined()
+			expect(modelInfo.info.maxTokens).toBe(8192)
+			expect(modelInfo.info.contextWindow).toBe(1_000_000)
+		})
+
 		it("honors custom maxTokens for thinking models", () => {
 			const handler = new AnthropicVertexHandler({
 				apiKey: "test-api-key",

@@ -87,16 +87,32 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			model: id,
 			max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
 			temperature,
-			thinking,
 			// Cache the system prompt if caching is enabled.
 			system: supportsPromptCache
 				? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
 				: systemPrompt,
 			messages: supportsPromptCache ? addCacheBreakpoints(messages) : messages,
 			stream: true,
 		}
+		// Only set thinking if defined to avoid adding an explicit undefined property
+		if (thinking) {
+			;(params as any).thinking = thinking
+		}
 
-		const stream = await this.client.messages.create(params)
+		// Enable 1M context beta when using [1m] variants or when explicitly enabled via settings for Sonnet 4/4.5
+		const use1m =
+			this.options.apiModelId?.endsWith("[1m]") === true ||
+			((id === "claude-sonnet-4@20250514" || id === "claude-sonnet-4-5@20250929") &&
+				this.options.anthropicBeta1MContext === true)
+
+		let stream
+		if (use1m) {
+			stream = await this.client.messages.create(params, {
+				headers: { "anthropic-beta": "context-1m-2025-08-07" },
+			})
+		} else {
+			stream = await this.client.messages.create(params)
+		}
 
 		for await (const chunk of stream) {
 			switch (chunk.type) {
@@ -171,8 +187,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 		// The `:thinking` suffix indicates that the model is a "Hybrid"
 		// reasoning model and that reasoning is required to be enabled.
 		// The actual model ID honored by Anthropic's API does not have this
-		// suffix.
-		return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
+		// suffix. Additionally, strip the optional [1m] suffix used to
+		// denote the 1M context beta variant in Roo's model list.
+		const normalizedId = id.replace(":thinking", "").replace("[1m]", "")
+		return { id: normalizedId, info, ...params }
 	}
 
 	async completePrompt(prompt: string) {
@@ -189,7 +207,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 				model: id,
 				max_tokens: maxTokens,
 				temperature,
-				thinking,
 				messages: [
 					{
 						role: "user",
@@ -200,8 +217,20 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 				],
 				stream: false,
 			}
+			// Only set thinking if defined to avoid adding an explicit undefined property
+			if (thinking) {
+				;(params as any).thinking = thinking
+			}
+
+			// Enable 1M context beta when using [1m] variants or when explicitly enabled via settings for Sonnet 4/4.5
+			const use1m =
+				this.options.apiModelId?.endsWith("[1m]") === true ||
+				((id === "claude-sonnet-4@20250514" || id === "claude-sonnet-4-5@20250929") &&
+					this.options.anthropicBeta1MContext === true)
 
-			const response = await this.client.messages.create(params)
+			const response = use1m
+				? await this.client.messages.create(params, { headers: { "anthropic-beta": "context-1m-2025-08-07" } })
+				: await this.client.messages.create(params)
 			const content = response.content[0]
 
 			if (content.type === "text") {

@@ -1,18 +1,66 @@
 import type { ModelInfo } from "@roo-code/types"
 
+/**
+ * Determine effective per‑million prices for this request based on model tiers.
+ * If tiers are defined, pick the first tier whose contextWindow >= tierBasisTokens.
+ * Fallback to the last tier when all tiers are below the observed tokens.
+ */
+function selectTierPrices(
+	modelInfo: ModelInfo,
+	tierBasisTokens: number,
+): {
+	inputPrice: number
+	outputPrice: number
+	cacheReadsPrice: number
+} {
+	let inputPrice = modelInfo.inputPrice ?? 0
+	let outputPrice = modelInfo.outputPrice ?? 0
+	let cacheReadsPrice = modelInfo.cacheReadsPrice ?? 0
+
+	const tiers = (modelInfo as ModelInfo).tiers
+	if (Array.isArray(tiers) && tiers.length > 0) {
+		// If tiers are "service tiers" (e.g., OpenAI flex/priority), they will have a name.
+		// Do NOT auto-select by tokens in that case. Pricing is chosen explicitly by the provider path.
+		const hasNamedTiers = (tiers as any[]).some(
+			(t) => typeof (t as any).name === "string" && (t as any).name.length > 0,
+		)
+
+		if (!hasNamedTiers) {
+			// Choose the smallest tier that can accommodate the request's input size
+			let chosen =
+				tiers.find(
+					(t) =>
+						tierBasisTokens <=
+						(t.contextWindow === Infinity ? Number.POSITIVE_INFINITY : (t.contextWindow as number)),
+				) || tiers[tiers.length - 1]!
+
+			inputPrice = chosen.inputPrice ?? inputPrice
+			outputPrice = chosen.outputPrice ?? outputPrice
+			cacheReadsPrice = chosen.cacheReadsPrice ?? cacheReadsPrice
+		}
+	}
+
+	return { inputPrice, outputPrice, cacheReadsPrice }
+}
+
 function calculateApiCostInternal(
 	modelInfo: ModelInfo,
 	inputTokens: number,
 	outputTokens: number,
 	cacheCreationInputTokens: number,
 	cacheReadInputTokens: number,
+	// Use total input tokens (before cache deductions) to determine tier selection
+	tierBasisTokens: number,
 ): number {
-	const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens
-	const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens
-	const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens
-	const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens
-	const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
-	return totalCost
+	const { inputPrice, outputPrice, cacheReadsPrice } = selectTierPrices(modelInfo, tierBasisTokens)
+
+	const cacheWritesPrice = modelInfo.cacheWritesPrice || 0
+	const cacheWritesCost = (cacheWritesPrice / 1_000_000) * cacheCreationInputTokens
+	const cacheReadsCost = (cacheReadsPrice / 1_000_000) * cacheReadInputTokens
+	const baseInputCost = (inputPrice / 1_000_000) * inputTokens
+	const outputCost = (outputPrice / 1_000_000) * outputTokens
+
+	return cacheWritesCost + cacheReadsCost + baseInputCost + outputCost
 }
 
 // For Anthropic compliant usage, the input tokens count does NOT include the
@@ -30,6 +78,8 @@ export function calculateApiCostAnthropic(
 		outputTokens,
 		cacheCreationInputTokens || 0,
 		cacheReadInputTokens || 0,
+		// Tier basis for Anthropic protocol = actual input tokens (no cache included)
+		inputTokens,
 	)
 }
 
@@ -51,6 +101,8 @@ export function calculateApiCostOpenAI(
 		outputTokens,
 		cacheCreationInputTokensNum,
 		cacheReadInputTokensNum,
+		// Tier basis for OpenAI protocol = total input tokens before subtracting cache
+		inputTokens,
 	)
 }