Skip to content

Commit 4236faa

Browse files
authored
feat(bedrock-content-generator): use inference profiles for on-demand Claude models (#10711)
- Add getInvokeId(region) and pass region into invokeCommand so InvokeModel uses inference profile IDs for Sonnet 4.x, 3.5 Haiku, 3 Haiku, v3 Sonnet - Keep backward compatibility: stored model id unchanged; v2.1, Instant, Llama, Mistral still use foundation model ID - Treat models with getInvokeId as in-region when not in ListFoundationModels and rely on invoke check for availability Made-with: Cursor
1 parent 0b55f21 commit 4236faa

File tree

4 files changed

+107
-23
lines changed

4 files changed

+107
-23
lines changed

apps/bedrock-content-generator/src/components/config/model/Model.spec.tsx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ describe('Display Model', () => {
6060
getModelAvailability: vi
6161
.fn()
6262
.mockResolvedValueOnce('AVAILABLE')
63-
.mockResolvedValueOnce('NOT_IN_ACCOUNT'),
63+
.mockResolvedValueOnce('NOT_IN_ACCOUNT')
64+
.mockResolvedValue('AVAILABLE'),
6465
};
6566
});
6667
const { getByText, unmount, findByText } = render(
@@ -80,9 +81,7 @@ describe('Display Model', () => {
8081
expect(getByText('Anthropic Claude v2.1')).toBeTruthy();
8182
});
8283
await waitFor(() => {
83-
expect(
84-
getByText(/The models .*, Meta Llama 2 70B, .* are not available in the abc region./)
85-
).toBeTruthy();
84+
expect(getByText(/Meta Llama 2 70B.*are not available in the abc region\./)).toBeTruthy();
8685
});
8786
await waitFor(() => {
8887
expect(

apps/bedrock-content-generator/src/components/config/model/Model.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ const Model = ({ credentials, credentialsValid, model, modelValid, region, dispa
7676
ai.getModels().then((allModels) => {
7777
const modelsWithRegionAvailability: ModelWithAvailability[] = featuredModels.map(
7878
(featuredModel) => {
79-
const isInRegion = allModels.some((m) => m.modelId === featuredModel.id);
79+
const isInFoundationList = allModels.some((m) => m.modelId === featuredModel.id);
80+
// Models that use inference profiles may not appear in ListFoundationModels; still run availability check (invoke with profile ID).
81+
const isInRegion = isInFoundationList || !!featuredModel.getInvokeId;
8082

8183
return {
8284
...featuredModel,

apps/bedrock-content-generator/src/configs/aws/featuredModels.ts

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,22 @@
11
import { InvokeModelCommandInput } from '@aws-sdk/client-bedrock-runtime';
22

3+
/** Maps AWS region to geography prefix for inference profile IDs (us/eu/global). */
4+
export function getInferenceProfilePrefix(region: string): 'us' | 'eu' | 'global' {
5+
if (region.startsWith('eu-')) return 'eu';
6+
if (region.startsWith('us-') || region.startsWith('ca-')) return 'us';
7+
return 'global';
8+
}
9+
310
export interface BedrockModel {
411
id: string;
512
name: string;
13+
/** When set, used as modelId for InvokeModel (inference profile ID); otherwise id is used. Backward compat: existing models omit this. */
14+
getInvokeId?: (region: string) => string;
615
invokeCommand: (
716
systemPrompt: string,
817
prompt: string,
9-
maxTokens?: number
18+
maxTokens?: number,
19+
region?: string
1020
) => InvokeModelCommandInput;
1121
// eslint-disable-next-line @typescript-eslint/no-explicit-any
1222
parseResponse: (response: any) => string;
@@ -31,13 +41,21 @@ interface ContentBlockStartMsg {
3141
class ClaudeModel implements BedrockModel {
3242
id: string;
3343
name: string;
44+
getInvokeId?: (region: string) => string;
3445

35-
constructor(id: string, name: string) {
46+
constructor(id: string, name: string, getInvokeId?: (region: string) => string) {
3647
this.id = id;
3748
this.name = name;
49+
this.getInvokeId = getInvokeId;
3850
}
3951

40-
invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
52+
invokeCommand(
53+
systemPrompt: string,
54+
prompt: string,
55+
maxTokens?: number,
56+
region?: string
57+
): InvokeModelCommandInput {
58+
const modelId = this.getInvokeId && region !== undefined ? this.getInvokeId(region) : this.id;
4159
const messages = [
4260
{
4361
role: 'user',
@@ -51,7 +69,7 @@ class ClaudeModel implements BedrockModel {
5169
];
5270

5371
return {
54-
modelId: this.id,
72+
modelId,
5573
contentType: 'application/json',
5674

5775
body: JSON.stringify({
@@ -79,7 +97,13 @@ class LlamaModel implements BedrockModel {
7997
this.id = id;
8098
this.name = name;
8199
}
82-
invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
100+
invokeCommand(
101+
systemPrompt: string,
102+
prompt: string,
103+
maxTokens?: number,
104+
region?: string
105+
): InvokeModelCommandInput {
106+
void region; // optional for interface compat; Llama uses direct model ID only
83107
const completePrompt = `
84108
${systemPrompt}
85109
@@ -111,7 +135,13 @@ class MistralModel implements BedrockModel {
111135
this.name = name;
112136
}
113137

114-
invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
138+
invokeCommand(
139+
systemPrompt: string,
140+
prompt: string,
141+
maxTokens?: number,
142+
region?: string
143+
): InvokeModelCommandInput {
144+
void region; // optional for interface compat; Mistral uses direct model ID only
115145
const completePrompt = `<s>[INST] ${systemPrompt} [/INST]
116146
[INST] ${prompt} [/INST]`;
117147

@@ -130,19 +160,67 @@ class MistralModel implements BedrockModel {
130160
}
131161
}
132162

133-
/** Default/recommended model for new installs (must be first in featuredModels). */
163+
/** Inference profile IDs for models that require them (on-demand no longer supports raw model ID). See AWS docs: inference-profiles-use, inference-profiles-support. */
164+
function inferenceProfileId(region: string, profileIdByPrefix: Record<string, string>): string {
165+
const prefix = getInferenceProfilePrefix(region);
166+
return profileIdByPrefix[prefix] ?? profileIdByPrefix['us'] ?? profileIdByPrefix['global'];
167+
}
168+
134169
export const defaultModelId = 'anthropic.claude-sonnet-4-6';
135170
export const defaultModelDisplayName = 'Anthropic Claude Sonnet 4.6';
136171

137172
export const featuredModels: BedrockModel[] = [
138-
// Modern Claude models (recommended)
139-
new ClaudeModel(defaultModelId, defaultModelDisplayName),
140-
new ClaudeModel('anthropic.claude-sonnet-4-5-20250929-v1:0', 'Anthropic Claude Sonnet 4.5'),
141-
new ClaudeModel('anthropic.claude-sonnet-4-20250514-v1:0', 'Anthropic Claude Sonnet 4'),
142-
new ClaudeModel('anthropic.claude-3-5-haiku-20241022-v1:0', 'Anthropic Claude 3.5 Haiku'),
143-
new ClaudeModel('anthropic.claude-3-haiku-20240307-v1:0', 'Anthropic Claude 3 Haiku'),
144-
// Existing models (kept for backward compatibility)
145-
new ClaudeModel('anthropic.claude-3-sonnet-20240229-v1:0', 'Anthropic Claude v3 Sonnet'),
173+
// Modern Claude models (use inference profiles; raw model ID causes ValidationException on-demand)
174+
new ClaudeModel(
175+
defaultModelId,
176+
defaultModelDisplayName,
177+
() => 'global.anthropic.claude-sonnet-4-6'
178+
),
179+
new ClaudeModel(
180+
'anthropic.claude-sonnet-4-5-20250929-v1:0',
181+
'Anthropic Claude Sonnet 4.5',
182+
(region) =>
183+
inferenceProfileId(region, {
184+
global: 'global.anthropic.claude-sonnet-4-5-20250929-v1:0',
185+
us: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
186+
eu: 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
187+
})
188+
),
189+
new ClaudeModel(
190+
'anthropic.claude-sonnet-4-20250514-v1:0',
191+
'Anthropic Claude Sonnet 4',
192+
(region) =>
193+
inferenceProfileId(region, {
194+
global: 'global.anthropic.claude-sonnet-4-20250514-v1:0',
195+
us: 'us.anthropic.claude-sonnet-4-20250514-v1:0',
196+
eu: 'eu.anthropic.claude-sonnet-4-20250514-v1:0',
197+
})
198+
),
199+
new ClaudeModel(
200+
'anthropic.claude-3-5-haiku-20241022-v1:0',
201+
'Anthropic Claude 3.5 Haiku',
202+
(region) =>
203+
inferenceProfileId(region, {
204+
us: 'us.anthropic.claude-3-5-haiku-20241022-v1:0',
205+
eu: 'eu.anthropic.claude-3-5-haiku-20241022-v1:0',
206+
})
207+
),
208+
new ClaudeModel('anthropic.claude-3-haiku-20240307-v1:0', 'Anthropic Claude 3 Haiku', (region) =>
209+
inferenceProfileId(region, {
210+
us: 'us.anthropic.claude-3-haiku-20240307-v1:0',
211+
eu: 'eu.anthropic.claude-3-haiku-20240307-v1:0',
212+
})
213+
),
214+
// Existing models that require inference profiles (v3 Sonnet) or direct model ID (v2.1, Instant)
215+
new ClaudeModel(
216+
'anthropic.claude-3-sonnet-20240229-v1:0',
217+
'Anthropic Claude v3 Sonnet',
218+
(region) =>
219+
inferenceProfileId(region, {
220+
us: 'us.anthropic.claude-3-sonnet-20240229-v1:0',
221+
eu: 'eu.anthropic.claude-3-sonnet-20240229-v1:0',
222+
})
223+
),
146224
new ClaudeModel('anthropic.claude-v2:1', 'Anthropic Claude v2.1'),
147225
new ClaudeModel('anthropic.claude-instant-v1', 'Anthropic Claude Instant v1.2'),
148226
new LlamaModel('meta.llama2-70b-chat-v1', 'Meta Llama 2 70B'),

apps/bedrock-content-generator/src/utils/aiApi/index.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ class AI {
1515
private bedrockClient: BedrockClient;
1616
private bedrockRuntimeClient: BedrockRuntimeClient;
1717

18+
private region: string;
19+
1820
constructor(accessKeyID: string, secretAccessKey: string, region: string, model?: BedrockModel) {
1921
this.decoder = new TextDecoder('utf-8');
22+
this.region = region;
2023

2124
const config = {
2225
region,
@@ -40,9 +43,10 @@ class AI {
4043
prompt: string
4144
): Promise<AsyncGenerator<string, void, unknown> | undefined> => {
4245
const model = this.model!;
43-
console.log(`modelId: ${model.id}`);
46+
const invokeInput = model.invokeCommand(systemPrompt, prompt, 2048, this.region);
47+
console.log(`modelId: ${invokeInput.modelId}`);
4448
const stream = await this.bedrockRuntimeClient.send(
45-
new InvokeModelWithResponseStreamCommand(model.invokeCommand(systemPrompt, prompt, 2048))
49+
new InvokeModelWithResponseStreamCommand(invokeInput)
4650
);
4751

4852
if (!stream.body) return;
@@ -74,7 +78,8 @@ class AI {
7478
) => {
7579
try {
7680
console.log(model);
77-
await this.bedrockRuntimeClient.send(new InvokeModelCommand(model.invokeCommand('', '', 1)));
81+
const invokeInput = model.invokeCommand('', '', 1, this.region);
82+
await this.bedrockRuntimeClient.send(new InvokeModelCommand(invokeInput));
7883
} catch (e: unknown) {
7984
if (!(e instanceof Error)) {
8085
return Error('An unexpected error has occurred');

0 commit comments

Comments
 (0)