-
Notifications
You must be signed in to change notification settings - Fork 2.4k
feat: enable Claude 4.5 global inference profile support for AWS Bedrock #8542
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,229 @@ | ||
| // npx vitest run src/api/providers/__tests__/bedrock-global-inference.spec.ts | ||
|
|
||
| import { AwsBedrockHandler } from "../bedrock" | ||
| import { BedrockRuntimeClient, ConverseStreamCommand } from "@aws-sdk/client-bedrock-runtime" | ||
| import { logger } from "../../../utils/logging" | ||
| import type { ProviderSettings } from "@roo-code/types" | ||
|
|
||
| // Mock AWS SDK modules | ||
| vitest.mock("@aws-sdk/client-bedrock-runtime", () => { | ||
| const mockSend = vi.fn().mockResolvedValue({ | ||
| stream: (async function* () { | ||
| yield { | ||
| contentBlockStart: { | ||
| start: { text: "Test response" }, | ||
| }, | ||
| } | ||
| yield { | ||
| contentBlockDelta: { | ||
| delta: { text: " from Claude" }, | ||
| }, | ||
| } | ||
| yield { | ||
| messageStop: {}, | ||
| } | ||
| })(), | ||
| }) | ||
|
|
||
| return { | ||
| BedrockRuntimeClient: vi.fn().mockImplementation(() => ({ | ||
| send: mockSend, | ||
| })), | ||
| ConverseStreamCommand: vi.fn(), | ||
| ConverseCommand: vi.fn(), | ||
| } | ||
| }) | ||
|
|
||
| vitest.mock("../../../utils/logging") | ||
|
|
||
| describe("AwsBedrockHandler - Global Inference Profile Support", () => { | ||
| let handler: AwsBedrockHandler | ||
| let mockSend: any | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks() | ||
| mockSend = vi.fn().mockResolvedValue({ | ||
| stream: (async function* () { | ||
| yield { | ||
| contentBlockStart: { | ||
| start: { text: "Test response" }, | ||
| }, | ||
| } | ||
| yield { | ||
| contentBlockDelta: { | ||
| delta: { text: " from Claude" }, | ||
| }, | ||
| } | ||
| yield { | ||
| messageStop: {}, | ||
| } | ||
| })(), | ||
| }) | ||
| ;(BedrockRuntimeClient as any).mockImplementation(() => ({ | ||
| send: mockSend, | ||
| })) | ||
| }) | ||
|
|
||
| describe("Global Inference Profile ARN Support", () => { | ||
| it("should detect Claude Sonnet 4.5 global inference profile ARN", () => { | ||
| const options: ProviderSettings = { | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsCustomArn: | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| } | ||
|
|
||
| handler = new AwsBedrockHandler(options) | ||
| const model = handler.getModel() | ||
|
|
||
| // Should recognize the ARN and provide appropriate model info | ||
| expect(model.id).toBe( | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| ) | ||
| expect(model.info).toBeDefined() | ||
| expect(model.info.supportsReasoningBudget).toBe(true) | ||
| expect(model.info.supportsPromptCache).toBe(true) | ||
| expect(model.info.supportsImages).toBe(true) | ||
| }) | ||
|
|
||
| it("should enable 1M context for global inference profile when awsBedrock1MContext is true", async () => { | ||
| const options: ProviderSettings = { | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsCustomArn: | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| awsBedrock1MContext: true, | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| } | ||
|
|
||
| handler = new AwsBedrockHandler(options) | ||
|
|
||
| const messages = [{ role: "user" as const, content: "Test message" }] | ||
| const stream = handler.createMessage("System prompt", messages) | ||
|
|
||
| // Consume the stream | ||
| const chunks = [] | ||
| for await (const chunk of stream) { | ||
| chunks.push(chunk) | ||
| } | ||
|
|
||
| // Check that the command was called | ||
| expect(mockSend).toHaveBeenCalled() | ||
| expect(ConverseStreamCommand).toHaveBeenCalled() | ||
|
|
||
| // Get the payload from the ConverseStreamCommand constructor | ||
| const commandPayload = (ConverseStreamCommand as any).mock.calls[0][0] | ||
| expect(commandPayload).toBeDefined() | ||
| expect(commandPayload.additionalModelRequestFields).toBeDefined() | ||
| expect(commandPayload.additionalModelRequestFields.anthropic_beta).toContain("context-1m-2025-08-07") | ||
| }) | ||
|
|
||
| it("should enable thinking/reasoning for global inference profile", async () => { | ||
| const options: ProviderSettings = { | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsCustomArn: | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| enableReasoningEffort: true, | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| } | ||
|
|
||
| handler = new AwsBedrockHandler(options) | ||
|
|
||
| const messages = [{ role: "user" as const, content: "Test message" }] | ||
| const metadata = { | ||
| taskId: "test-task-id", | ||
| thinking: { | ||
| enabled: true, | ||
| maxThinkingTokens: 8192, | ||
| }, | ||
| } | ||
|
|
||
| const stream = handler.createMessage("System prompt", messages, metadata) | ||
|
|
||
| // Consume the stream | ||
| const chunks = [] | ||
| for await (const chunk of stream) { | ||
| chunks.push(chunk) | ||
| } | ||
|
|
||
| // Check that thinking was enabled | ||
| expect(logger.info).toHaveBeenCalledWith( | ||
| expect.stringContaining("Extended thinking enabled"), | ||
| expect.objectContaining({ | ||
| ctx: "bedrock", | ||
| thinking: expect.objectContaining({ | ||
| type: "enabled", | ||
| budget_tokens: 8192, | ||
| }), | ||
| }), | ||
| ) | ||
| }) | ||
|
|
||
| it("should handle various Claude 4.5 ARN patterns", () => { | ||
| const testCases = [ | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| "arn:aws:bedrock:eu-west-1:123456789012:inference-profile/anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| "arn:aws:bedrock:ap-southeast-1:987654321098:foundation-model/anthropic.claude-sonnet-4.5-v1:0", | ||
| ] | ||
|
|
||
| testCases.forEach((arn) => { | ||
| const options: ProviderSettings = { | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsCustomArn: arn, | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| } | ||
|
|
||
| handler = new AwsBedrockHandler(options) | ||
| const model = handler.getModel() | ||
|
|
||
| expect(model.info.supportsReasoningBudget).toBe(true) | ||
| expect(model.info.supportsPromptCache).toBe(true) | ||
| }) | ||
| }) | ||
|
|
||
| it("should not enable thinking for non-Claude-4.5 custom ARNs", () => { | ||
| const options: ProviderSettings = { | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsCustomArn: | ||
| "arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3-haiku-20240307-v1:0", | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| } | ||
|
|
||
| handler = new AwsBedrockHandler(options) | ||
| const model = handler.getModel() | ||
|
|
||
| // Should not have reasoning budget support for non-Claude-4.5 models | ||
| expect(model.info.supportsReasoningBudget).toBeFalsy() | ||
| }) | ||
| }) | ||
|
|
||
| describe("ARN Parsing with Global Inference Profile", () => { | ||
| it("should correctly parse global inference profile ARN", () => { | ||
| const handler = new AwsBedrockHandler({ | ||
| apiProvider: "bedrock", | ||
| awsRegion: "us-east-1", | ||
| awsAccessKey: "test-key", | ||
| awsSecretKey: "test-secret", | ||
| }) | ||
|
|
||
| const parseArn = (handler as any).parseArn.bind(handler) | ||
| const result = parseArn( | ||
| "arn:aws:bedrock:us-east-1:148761681080:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0", | ||
| ) | ||
|
|
||
| expect(result.isValid).toBe(true) | ||
| expect(result.region).toBe("us-east-1") | ||
| expect(result.modelType).toBe("inference-profile") | ||
| expect(result.modelId).toContain("anthropic.claude-sonnet-4-5-20250929") | ||
| }) | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -252,9 +252,34 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| } | ||
|
|
||
| // Helper to guess model info from custom modelId string if not in bedrockModels | ||
| private guessModelInfoFromId(modelId: string): Partial<ModelInfo> { | ||
| private guessModelInfoFromId(modelId: string | undefined): Partial<ModelInfo> { | ||
| // Handle undefined or empty modelId | ||
| if (!modelId) { | ||
| return { | ||
| maxTokens: BEDROCK_MAX_TOKENS, | ||
| contextWindow: BEDROCK_DEFAULT_CONTEXT, | ||
| supportsImages: false, | ||
| supportsPromptCache: false, | ||
| } | ||
| } | ||
| // Define a mapping for model ID patterns and their configurations | ||
| const modelConfigMap: Record<string, Partial<ModelInfo>> = { | ||
| // Claude 4.5 Sonnet models (including global inference profile) | ||
| "claude-sonnet-4-5": { | ||
| maxTokens: 8192, | ||
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| supportsReasoningBudget: true, | ||
| }, | ||
| // Claude 4 Sonnet models | ||
| "claude-sonnet-4": { | ||
| maxTokens: 8192, | ||
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| supportsReasoningBudget: true, | ||
| }, | ||
| "claude-4": { | ||
| maxTokens: 8192, | ||
| contextWindow: 200_000, | ||
|
|
@@ -266,6 +291,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| contextWindow: 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| supportsReasoningBudget: true, | ||
| }, | ||
| "claude-3-5": { | ||
| maxTokens: 8192, | ||
|
|
@@ -376,8 +402,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| // Check if 1M context is enabled for Claude Sonnet 4 | ||
| // Use parseBaseModelId to handle cross-region inference prefixes | ||
| const baseModelId = this.parseBaseModelId(modelConfig.id) | ||
| const is1MContextEnabled = | ||
| BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) && this.options.awsBedrock1MContext | ||
| // Check if it's a known model ID or if it's a custom ARN that matches Claude 4.5 pattern | ||
| const isEligibleFor1MContext = | ||
| BEDROCK_1M_CONTEXT_MODEL_IDS.includes(baseModelId as any) || this.isClaudeSonnet45Model(modelConfig.id) | ||
| const is1MContextEnabled = isEligibleFor1MContext && this.options.awsBedrock1MContext | ||
|
|
||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P2] 1M context is enabled via anthropic_beta for custom Claude 4.5 ARNs, but getModel() still reports a 200k contextWindow for these cases. This can desync pricing/limits and any logic relying on info.contextWindow. Consider also setting info.contextWindow to 1_000_000 when awsBedrock1MContext is true for 4.5 custom ARNs to keep behavior consistent across UI, cost model, and runtime. |
||
| // Add anthropic_beta for 1M context to additionalModelRequestFields | ||
| if (is1MContextEnabled) { | ||
|
|
@@ -889,6 +917,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH | |
| return modelId | ||
| } | ||
|
|
||
| // Helper method to check if a model ID represents a Claude Sonnet 4.5 model | ||
| private isClaudeSonnet45Model(modelId: string): boolean { | ||
| const id = modelId.toLowerCase() | ||
| // Check for various Claude 4.5 patterns including global inference profile | ||
| return ( | ||
| id.includes("claude-sonnet-4-5") || | ||
| id.includes("claude-sonnet-4.5") || | ||
| // Specific check for the global inference profile ARN mentioned in the issue | ||
| id.includes("global.anthropic.claude-sonnet-4-5-20250929") | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P3] The pattern check hardcodes the date suffix "20250929". Since the broader checks already match 'claude-sonnet-4-5'/'claude-sonnet-4.5', this specific fragment is brittle and redundant. Consider removing or relaxing it to avoid unexpected mismatches when AWS updates the ARN version. |
||
| ) | ||
| } | ||
|
|
||
| //Prompt Router responses come back in a different sequence and the model used is in the response and must be fetched by name | ||
| getModelById(modelId: string, modelType?: string): { id: BedrockModelId | string; info: ModelInfo } { | ||
| // Try to find the model in bedrockModels | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -192,12 +192,44 @@ function getSelectedModel({ | |
| const id = apiConfiguration.apiModelId ?? bedrockDefaultModelId | ||
| const baseInfo = bedrockModels[id as keyof typeof bedrockModels] | ||
|
|
||
| // Helper function to check if a model ID or ARN represents a Claude Sonnet 4.5 model | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P2] Claude 4.5 detection logic is duplicated here and in settings/providers/Bedrock.tsx, and again in the backend. This increases drift risk. Recommend extracting a shared helper (e.g., a small util in a shared package) and reusing it across frontend/backend so feature gating (thinking + 1M) stays consistent. |
||
| const isClaudeSonnet45Model = (modelId: string): boolean => { | ||
| if (!modelId) return false | ||
| const lowerId = modelId.toLowerCase() | ||
| return ( | ||
| lowerId.includes("claude-sonnet-4-5") || | ||
| lowerId.includes("claude-sonnet-4.5") || | ||
| // Specific check for the global inference profile ARN | ||
| lowerId.includes("global.anthropic.claude-sonnet-4-5-20250929") | ||
| ) | ||
| } | ||
|
|
||
| // Special case for custom ARN. | ||
| if (id === "custom-arn") { | ||
| return { | ||
| id, | ||
| info: { maxTokens: 5000, contextWindow: 128_000, supportsPromptCache: false, supportsImages: true }, | ||
| const customArn = apiConfiguration.awsCustomArn || "" | ||
| const isClaudeSonnet45 = isClaudeSonnet45Model(customArn) | ||
|
|
||
| // Base info for custom ARNs | ||
| let info: ModelInfo = { | ||
| maxTokens: 5000, | ||
| contextWindow: 128_000, | ||
| supportsPromptCache: false, | ||
| supportsImages: true, | ||
| } | ||
|
|
||
| // If it's a Claude Sonnet 4.5 model, add thinking support and better defaults | ||
| if (isClaudeSonnet45) { | ||
| info = { | ||
| maxTokens: 8192, | ||
| contextWindow: apiConfiguration.awsBedrock1MContext ? 1_000_000 : 200_000, | ||
| supportsImages: true, | ||
| supportsPromptCache: true, | ||
| supportsReasoningBudget: true, | ||
| supportsComputerUse: true, | ||
| } | ||
| } | ||
|
|
||
| return { id, info } | ||
| } | ||
|
|
||
| // Apply 1M context for Claude Sonnet 4 / 4.5 when enabled | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[P1] The logger module is mocked without a factory, but the test asserts on logger.info. To ensure logger.info is a spy, explicitly mock the module exports with a concrete logger object.