Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,18 @@ export const vertexModels = {
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-sonnet-4@20250514[1m]": {
maxTokens: 8192,
contextWindow: 1_000_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-sonnet-4-5@20250929": {
maxTokens: 8192,
contextWindow: 200_000,
Expand All @@ -187,6 +199,18 @@ export const vertexModels = {
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-sonnet-4-5@20250929[1m]": {
maxTokens: 8192,
contextWindow: 1_000_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
supportsReasoningBudget: true,
},
"claude-haiku-4-5@20251001": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down
30 changes: 30 additions & 0 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,36 @@ describe("VertexHandler", () => {
expect(modelInfo.info.contextWindow).toBe(200_000)
})

it("should return 1M context window for Claude Sonnet 4 [1m] variant", () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-4@20250514[1m]",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const modelInfo = handler.getModel()
// The provider strips the [1m] suffix when sending to API
expect(modelInfo.id).toBe("claude-sonnet-4@20250514")
expect(modelInfo.info).toBeDefined()
expect(modelInfo.info.maxTokens).toBe(8192)
expect(modelInfo.info.contextWindow).toBe(1_000_000)
})

it("should return 1M context window for Claude Sonnet 4.5 [1m] variant", () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-4-5@20250929[1m]",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const modelInfo = handler.getModel()
// The provider strips the [1m] suffix when sending to API
expect(modelInfo.id).toBe("claude-sonnet-4-5@20250929")
expect(modelInfo.info).toBeDefined()
expect(modelInfo.info.maxTokens).toBe(8192)
expect(modelInfo.info.contextWindow).toBe(1_000_000)
})

it("honors custom maxTokens for thinking models", () => {
const handler = new AnthropicVertexHandler({
apiKey: "test-api-key",
Expand Down
35 changes: 29 additions & 6 deletions src/api/providers/anthropic-vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,29 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
model: id,
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
temperature,
thinking,
// Cache the system prompt if caching is enabled.
system: supportsPromptCache
? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
: systemPrompt,
messages: supportsPromptCache ? addCacheBreakpoints(messages) : messages,
stream: true,
}
// Only set thinking if defined to avoid adding an explicit undefined property
if (thinking) {
;(params as any).thinking = thinking
}

const stream = await this.client.messages.create(params)
// Enable 1M context beta when using [1m] variants
const use1m = this.options.apiModelId?.endsWith("[1m]") === true

let stream
if (use1m) {
stream = await this.client.messages.create(params, {
headers: { "anthropic-beta": "context-1m-2025-08-07" },
})
} else {
stream = await this.client.messages.create(params)
}

for await (const chunk of stream) {
switch (chunk.type) {
Expand Down Expand Up @@ -171,8 +184,10 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
// The `:thinking` suffix indicates that the model is a "Hybrid"
// reasoning model and that reasoning is required to be enabled.
// The actual model ID honored by Anthropic's API does not have this
// suffix.
return { id: id.endsWith(":thinking") ? id.replace(":thinking", "") : id, info, ...params }
// suffix. Additionally, strip the optional [1m] suffix used to
// denote the 1M context beta variant in Roo's model list.
const normalizedId = id.replace(":thinking", "").replace("[1m]", "")
return { id: normalizedId, info, ...params }
}

async completePrompt(prompt: string) {
Expand All @@ -189,7 +204,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
model: id,
max_tokens: maxTokens,
temperature,
thinking,
messages: [
{
role: "user",
Expand All @@ -200,8 +214,17 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
],
stream: false,
}
// Only set thinking if defined to avoid adding an explicit undefined property
if (thinking) {
;(params as any).thinking = thinking
}

// Enable 1M context beta when using [1m] variants
const use1m = this.options.apiModelId?.endsWith("[1m]") === true

const response = await this.client.messages.create(params)
const response = use1m
? await this.client.messages.create(params, { headers: { "anthropic-beta": "context-1m-2025-08-07" } })
: await this.client.messages.create(params)
const content = response.content[0]

if (content.type === "text") {
Expand Down
Loading