FixBug: Inference profile isn't working for bedrock embedding model (#2144)

ybalbert001 · Yuanbo Li · gemini-code-assist[bot] · web-flow · commit 0a220055cf87 · 2025-11-28T10:56:47.000+08:00
* FixBug: Inference profile isn't working for bedrock embedding model

* Update models/bedrock/models/text_embedding/text_embedding.py

Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;

* Fix logic issue

---------

Co-authored-by: Yuanbo Li &lt;ybalbert@amazon.com&gt;
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/models/bedrock/manifest.yaml b/models/bedrock/manifest.yaml
@@ -1,4 +1,4 @@
-version: 0.0.52
+version: 0.0.53
 type: plugin
 author: langgenius
 name: bedrock
diff --git a/models/bedrock/models/text_embedding/text_embedding.py b/models/bedrock/models/text_embedding/text_embedding.py
@@ -62,10 +62,10 @@ def _invoke(
         if inference_profile_id:
             # Get the full ARN from the profile ID
             profile_info = get_inference_profile_info(inference_profile_id, credentials)
-            model_id = profile_info.get("inferenceProfileArn")
-            if not model_id:
+            model_package_arn = profile_info.get("inferenceProfileArn")
+            if not model_package_arn:
                 raise InvokeError(f"Could not get ARN for inference profile {inference_profile_id}")
-            logger.info(f"Using inference profile ARN: {model_id}")
+            logger.info(f"Using inference profile ARN: {model_package_arn}")
             
             # Determine model prefix from underlying models
             underlying_models = profile_info.get("models", [])
@@ -80,6 +80,7 @@ def _invoke(
                 raise InvokeError(f"No underlying models found in inference profile")
         else:
             # Traditional model - use model directly
+            model_package_arn = model
             model_prefix = model.split(".")[0]
             
         bedrock_runtime = get_bedrock_client("bedrock-runtime", credentials)
@@ -102,7 +103,7 @@ def _invoke(
                         }
                     }
                 }
-                response_body = self._invoke_bedrock_embedding(model_id, bedrock_runtime, body)
+                response_body = self._invoke_bedrock_embedding(model_package_arn, bedrock_runtime, body)
                 embedding_data = response_body.get("embeddings", [{}])[0]
                 embeddings.extend([embedding_data.get("embedding")])
                 token_usage += len(text.split())
@@ -120,7 +121,7 @@ def _invoke(
                 body = {
                     "inputText": text,
                 }
-                response_body = self._invoke_bedrock_embedding(model_id, bedrock_runtime, body)
+                response_body = self._invoke_bedrock_embedding(model_package_arn, bedrock_runtime, body)
                 embeddings.extend([response_body.get("embedding")])
                 token_usage += response_body.get("inputTextTokenCount")
             logger.warning(f"Total Tokens: {token_usage}")
@@ -138,7 +139,7 @@ def _invoke(
                     "texts": [text],
                     "input_type": input_type,
                 }
-                response_body = self._invoke_bedrock_embedding(model_id, bedrock_runtime, body)
+                response_body = self._invoke_bedrock_embedding(model_package_arn, bedrock_runtime, body)
                 embeddings.extend(response_body.get("embeddings"))
                 token_usage += len(text)
             result = TextEmbeddingResult(

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-version: 0.0.52`
	`1`	`+version: 0.0.53`
`2`	`2`	`type: plugin`
`3`	`3`	`author: langgenius`
`4`	`4`	`name: bedrock`