feat(bedrock-content-generator): use inference profiles for on-demand Claude models (#10711)

mgoudy91 · web-flow · commit 4236faab3384 · 2026-03-18T16:27:44.000-06:00
- Add getInvokeId(region) and pass region into invokeCommand so InvokeModel
  uses inference profile IDs for Sonnet 4.x, 3.5 Haiku, 3 Haiku, v3 Sonnet
- Keep backward compatibility: stored model id unchanged; v2.1, Instant,
  Llama, Mistral still use foundation model ID
- Treat models with getInvokeId as in-region when not in ListFoundationModels
  and rely on invoke check for availability

Made-with: Cursor
diff --git a/apps/bedrock-content-generator/src/components/config/model/Model.spec.tsx b/apps/bedrock-content-generator/src/components/config/model/Model.spec.tsx
@@ -60,7 +60,8 @@ describe('Display Model', () => {
         getModelAvailability: vi
           .fn()
           .mockResolvedValueOnce('AVAILABLE')
-          .mockResolvedValueOnce('NOT_IN_ACCOUNT'),
+          .mockResolvedValueOnce('NOT_IN_ACCOUNT')
+          .mockResolvedValue('AVAILABLE'),
       };
     });
     const { getByText, unmount, findByText } = render(
@@ -80,9 +81,7 @@ describe('Display Model', () => {
       expect(getByText('Anthropic Claude v2.1')).toBeTruthy();
     });
     await waitFor(() => {
-      expect(
-        getByText(/The models .*, Meta Llama 2 70B, .* are not available in the abc region./)
-      ).toBeTruthy();
+      expect(getByText(/Meta Llama 2 70B.*are not available in the abc region\./)).toBeTruthy();
     });
     await waitFor(() => {
       expect(
diff --git a/apps/bedrock-content-generator/src/components/config/model/Model.tsx b/apps/bedrock-content-generator/src/components/config/model/Model.tsx
@@ -76,7 +76,9 @@ const Model = ({ credentials, credentialsValid, model, modelValid, region, dispa
     ai.getModels().then((allModels) => {
       const modelsWithRegionAvailability: ModelWithAvailability[] = featuredModels.map(
         (featuredModel) => {
-          const isInRegion = allModels.some((m) => m.modelId === featuredModel.id);
+          const isInFoundationList = allModels.some((m) => m.modelId === featuredModel.id);
+          // Models that use inference profiles may not appear in ListFoundationModels; still run availability check (invoke with profile ID).
+          const isInRegion = isInFoundationList || !!featuredModel.getInvokeId;
 
           return {
             ...featuredModel,
diff --git a/apps/bedrock-content-generator/src/configs/aws/featuredModels.ts b/apps/bedrock-content-generator/src/configs/aws/featuredModels.ts
@@ -1,12 +1,22 @@
 import { InvokeModelCommandInput } from '@aws-sdk/client-bedrock-runtime';
 
+/** Maps AWS region to geography prefix for inference profile IDs (us/eu/global). */
+export function getInferenceProfilePrefix(region: string): 'us' | 'eu' | 'global' {
+  if (region.startsWith('eu-')) return 'eu';
+  if (region.startsWith('us-') || region.startsWith('ca-')) return 'us';
+  return 'global';
+}
+
 export interface BedrockModel {
   id: string;
   name: string;
+  /** When set, used as modelId for InvokeModel (inference profile ID); otherwise id is used. Backward compat: existing models omit this. */
+  getInvokeId?: (region: string) => string;
   invokeCommand: (
     systemPrompt: string,
     prompt: string,
-    maxTokens?: number
+    maxTokens?: number,
+    region?: string
   ) => InvokeModelCommandInput;
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   parseResponse: (response: any) => string;
@@ -31,13 +41,21 @@ interface ContentBlockStartMsg {
 class ClaudeModel implements BedrockModel {
   id: string;
   name: string;
+  getInvokeId?: (region: string) => string;
 
-  constructor(id: string, name: string) {
+  constructor(id: string, name: string, getInvokeId?: (region: string) => string) {
     this.id = id;
     this.name = name;
+    this.getInvokeId = getInvokeId;
   }
 
-  invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
+  invokeCommand(
+    systemPrompt: string,
+    prompt: string,
+    maxTokens?: number,
+    region?: string
+  ): InvokeModelCommandInput {
+    const modelId = this.getInvokeId && region !== undefined ? this.getInvokeId(region) : this.id;
     const messages = [
       {
         role: 'user',
@@ -51,7 +69,7 @@ class ClaudeModel implements BedrockModel {
     ];
 
     return {
-      modelId: this.id,
+      modelId,
       contentType: 'application/json',
 
       body: JSON.stringify({
@@ -79,7 +97,13 @@ class LlamaModel implements BedrockModel {
     this.id = id;
     this.name = name;
   }
-  invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
+  invokeCommand(
+    systemPrompt: string,
+    prompt: string,
+    maxTokens?: number,
+    region?: string
+  ): InvokeModelCommandInput {
+    void region; // optional for interface compat; Llama uses direct model ID only
     const completePrompt = `
 ${systemPrompt}
 
@@ -111,7 +135,13 @@ class MistralModel implements BedrockModel {
     this.name = name;
   }
 
-  invokeCommand(systemPrompt: string, prompt: string, maxTokens?: number): InvokeModelCommandInput {
+  invokeCommand(
+    systemPrompt: string,
+    prompt: string,
+    maxTokens?: number,
+    region?: string
+  ): InvokeModelCommandInput {
+    void region; // optional for interface compat; Mistral uses direct model ID only
     const completePrompt = `<s>[INST] ${systemPrompt} [/INST]
 [INST] ${prompt} [/INST]`;
 
@@ -130,19 +160,67 @@ class MistralModel implements BedrockModel {
   }
 }
 
-/** Default/recommended model for new installs (must be first in featuredModels). */
+/** Inference profile IDs for models that require them (on-demand no longer supports raw model ID). See AWS docs: inference-profiles-use, inference-profiles-support. */
+function inferenceProfileId(region: string, profileIdByPrefix: Record<string, string>): string {
+  const prefix = getInferenceProfilePrefix(region);
+  return profileIdByPrefix[prefix] ?? profileIdByPrefix['us'] ?? profileIdByPrefix['global'];
+}
+
 export const defaultModelId = 'anthropic.claude-sonnet-4-6';
 export const defaultModelDisplayName = 'Anthropic Claude Sonnet 4.6';
 
 export const featuredModels: BedrockModel[] = [
-  // Modern Claude models (recommended)
-  new ClaudeModel(defaultModelId, defaultModelDisplayName),
-  new ClaudeModel('anthropic.claude-sonnet-4-5-20250929-v1:0', 'Anthropic Claude Sonnet 4.5'),
-  new ClaudeModel('anthropic.claude-sonnet-4-20250514-v1:0', 'Anthropic Claude Sonnet 4'),
-  new ClaudeModel('anthropic.claude-3-5-haiku-20241022-v1:0', 'Anthropic Claude 3.5 Haiku'),
-  new ClaudeModel('anthropic.claude-3-haiku-20240307-v1:0', 'Anthropic Claude 3 Haiku'),
-  // Existing models (kept for backward compatibility)
-  new ClaudeModel('anthropic.claude-3-sonnet-20240229-v1:0', 'Anthropic Claude v3 Sonnet'),
+  // Modern Claude models (use inference profiles; raw model ID causes ValidationException on-demand)
+  new ClaudeModel(
+    defaultModelId,
+    defaultModelDisplayName,
+    () => 'global.anthropic.claude-sonnet-4-6'
+  ),
+  new ClaudeModel(
+    'anthropic.claude-sonnet-4-5-20250929-v1:0',
+    'Anthropic Claude Sonnet 4.5',
+    (region) =>
+      inferenceProfileId(region, {
+        global: 'global.anthropic.claude-sonnet-4-5-20250929-v1:0',
+        us: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0',
+        eu: 'eu.anthropic.claude-sonnet-4-5-20250929-v1:0',
+      })
+  ),
+  new ClaudeModel(
+    'anthropic.claude-sonnet-4-20250514-v1:0',
+    'Anthropic Claude Sonnet 4',
+    (region) =>
+      inferenceProfileId(region, {
+        global: 'global.anthropic.claude-sonnet-4-20250514-v1:0',
+        us: 'us.anthropic.claude-sonnet-4-20250514-v1:0',
+        eu: 'eu.anthropic.claude-sonnet-4-20250514-v1:0',
+      })
+  ),
+  new ClaudeModel(
+    'anthropic.claude-3-5-haiku-20241022-v1:0',
+    'Anthropic Claude 3.5 Haiku',
+    (region) =>
+      inferenceProfileId(region, {
+        us: 'us.anthropic.claude-3-5-haiku-20241022-v1:0',
+        eu: 'eu.anthropic.claude-3-5-haiku-20241022-v1:0',
+      })
+  ),
+  new ClaudeModel('anthropic.claude-3-haiku-20240307-v1:0', 'Anthropic Claude 3 Haiku', (region) =>
+    inferenceProfileId(region, {
+      us: 'us.anthropic.claude-3-haiku-20240307-v1:0',
+      eu: 'eu.anthropic.claude-3-haiku-20240307-v1:0',
+    })
+  ),
+  // Existing models that require inference profiles (v3 Sonnet) or direct model ID (v2.1, Instant)
+  new ClaudeModel(
+    'anthropic.claude-3-sonnet-20240229-v1:0',
+    'Anthropic Claude v3 Sonnet',
+    (region) =>
+      inferenceProfileId(region, {
+        us: 'us.anthropic.claude-3-sonnet-20240229-v1:0',
+        eu: 'eu.anthropic.claude-3-sonnet-20240229-v1:0',
+      })
+  ),
   new ClaudeModel('anthropic.claude-v2:1', 'Anthropic Claude v2.1'),
   new ClaudeModel('anthropic.claude-instant-v1', 'Anthropic Claude Instant v1.2'),
   new LlamaModel('meta.llama2-70b-chat-v1', 'Meta Llama 2 70B'),
diff --git a/apps/bedrock-content-generator/src/utils/aiApi/index.ts b/apps/bedrock-content-generator/src/utils/aiApi/index.ts
@@ -15,8 +15,11 @@ class AI {
   private bedrockClient: BedrockClient;
   private bedrockRuntimeClient: BedrockRuntimeClient;
 
+  private region: string;
+
   constructor(accessKeyID: string, secretAccessKey: string, region: string, model?: BedrockModel) {
     this.decoder = new TextDecoder('utf-8');
+    this.region = region;
 
     const config = {
       region,
@@ -40,9 +43,10 @@ class AI {
     prompt: string
   ): Promise<AsyncGenerator<string, void, unknown> | undefined> => {
     const model = this.model!;
-    console.log(`modelId: ${model.id}`);
+    const invokeInput = model.invokeCommand(systemPrompt, prompt, 2048, this.region);
+    console.log(`modelId: ${invokeInput.modelId}`);
     const stream = await this.bedrockRuntimeClient.send(
-      new InvokeModelWithResponseStreamCommand(model.invokeCommand(systemPrompt, prompt, 2048))
+      new InvokeModelWithResponseStreamCommand(invokeInput)
     );
 
     if (!stream.body) return;
@@ -74,7 +78,8 @@ class AI {
   ) => {
     try {
       console.log(model);
-      await this.bedrockRuntimeClient.send(new InvokeModelCommand(model.invokeCommand('', '', 1)));
+      const invokeInput = model.invokeCommand('', '', 1, this.region);
+      await this.bedrockRuntimeClient.send(new InvokeModelCommand(invokeInput));
     } catch (e: unknown) {
       if (!(e instanceof Error)) {
         return Error('An unexpected error has occurred');