Merge pull request #14687 from timelfrink/fix/issue-14685-bedrock-titan-v2-encoding-format

krrishdholakia · web-flow · commit 9238e1d3bf82 · 2025-09-18T17:54:42.000-07:00
Fix: Bedrock Titan V2 encoding_format parameter support
diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md
@@ -1822,6 +1822,59 @@ Here's an example of using a bedrock model with LiteLLM. For a complete list, re
 | Mixtral 8x7B Instruct      | `completion(model='bedrock/mistral.mixtral-8x7b-instruct-v0:1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 
 
+## Bedrock Embedding
+
+### API keys
+This can be set as env variables or passed as **params to litellm.embedding()**
+```python
+import os
+os.environ["AWS_ACCESS_KEY_ID"] = ""        # Access key
+os.environ["AWS_SECRET_ACCESS_KEY"] = ""    # Secret access key
+os.environ["AWS_REGION_NAME"] = ""           # us-east-1, us-east-2, us-west-1, us-west-2
+```
+
+### Usage
+```python
+from litellm import embedding
+response = embedding(
+    model="bedrock/amazon.titan-embed-text-v1",
+    input=["good morning from litellm"],
+)
+print(response)
+```
+
+#### Titan V2 - encoding_format support
+```python
+from litellm import embedding
+# Float format (default)
+response = embedding(
+    model="bedrock/amazon.titan-embed-text-v2:0",
+    input=["good morning from litellm"],
+    encoding_format="float"  # Returns float array
+)
+
+# Binary format
+response = embedding(
+    model="bedrock/amazon.titan-embed-text-v2:0",
+    input=["good morning from litellm"],
+    encoding_format="base64"  # Returns base64 encoded binary
+)
+```
+
+## Supported AWS Bedrock Embedding Models
+
+| Model Name           | Usage                               | Supported Additional OpenAI params |
+|----------------------|---------------------------------------------|-----|
+| Titan Embeddings V2 | `embedding(model="bedrock/amazon.titan-embed-text-v2:0", input=input)` | `dimensions`, `encoding_format` |
+| Titan Embeddings - V1 | `embedding(model="bedrock/amazon.titan-embed-text-v1", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/amazon_titan_g1_transformation.py#L53)
+| Titan Multimodal Embeddings | `embedding(model="bedrock/amazon.titan-embed-image-v1", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/amazon_titan_multimodal_transformation.py#L28) |
+| Cohere Embeddings - English | `embedding(model="bedrock/cohere.embed-english-v3", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/cohere_transformation.py#L18)
+| Cohere Embeddings - Multilingual | `embedding(model="bedrock/cohere.embed-multilingual-v3", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/cohere_transformation.py#L18)
+
+### Advanced - [Drop Unsupported Params](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)
+
+### Advanced - [Pass model/provider-specific Params](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)
+
 ## Image Generation
 Use this for stable diffusion, and amazon nova canvas on bedrock
 
diff --git a/litellm/llms/bedrock/embed/amazon_titan_v2_transformation.py b/litellm/llms/bedrock/embed/amazon_titan_v2_transformation.py
@@ -10,7 +10,7 @@
 """
 
 import types
-from typing import List, Optional
+from typing import List, Optional, Union
 
 from litellm.types.llms.bedrock import (
     AmazonTitanV2EmbeddingRequest,
@@ -30,9 +30,7 @@ class AmazonTitanV2Config:
     normalize: Optional[bool] = None
     dimensions: Optional[int] = None
 
-    def __init__(
-        self, normalize: Optional[bool] = None, dimensions: Optional[int] = None
-    ) -> None:
+    def __init__(self, normalize: Optional[bool] = None, dimensions: Optional[int] = None) -> None:
         locals_ = locals().copy()
         for key, value in locals_.items():
             if key != "self" and value is not None:
@@ -57,32 +55,56 @@ def get_config(cls):
         }
 
     def get_supported_openai_params(self) -> List[str]:
-        return ["dimensions"]
+        return ["dimensions", "encoding_format"]
 
-    def map_openai_params(
-        self, non_default_params: dict, optional_params: dict
-    ) -> dict:
+    def map_openai_params(self, non_default_params: dict, optional_params: dict) -> dict:
         for k, v in non_default_params.items():
             if k == "dimensions":
                 optional_params["dimensions"] = v
+            elif k == "encoding_format":
+                # Map OpenAI encoding_format to AWS embeddingTypes
+                if v == "float":
+                    optional_params["embeddingTypes"] = ["float"]
+                elif v == "base64":
+                    # base64 maps to binary format in AWS
+                    optional_params["embeddingTypes"] = ["binary"]
+                else:
+                    # For any other encoding format, default to float
+                    optional_params["embeddingTypes"] = ["float"]
         return optional_params
 
-    def _transform_request(
-        self, input: str, inference_params: dict
-    ) -> AmazonTitanV2EmbeddingRequest:
+    def _transform_request(self, input: str, inference_params: dict) -> AmazonTitanV2EmbeddingRequest:
         return AmazonTitanV2EmbeddingRequest(inputText=input, **inference_params)  # type: ignore
 
-    def _transform_response(
-        self, response_list: List[dict], model: str
-    ) -> EmbeddingResponse:
+    def _transform_response(self, response_list: List[dict], model: str) -> EmbeddingResponse:
         total_prompt_tokens = 0
 
         transformed_responses: List[Embedding] = []
         for index, response in enumerate(response_list):
             _parsed_response = AmazonTitanV2EmbeddingResponse(**response)  # type: ignore
+
+            # According to AWS docs, embeddingsByType is always present
+            # If binary was requested (encoding_format="base64"), use binary data
+            # Otherwise, use float data from embeddingsByType or fallback to embedding field
+            embedding_data: Union[List[float], List[int]]
+
+            if ("embeddingsByType" in _parsed_response and
+                "binary" in _parsed_response["embeddingsByType"]):
+                # Use binary data if available (for encoding_format="base64")
+                embedding_data = _parsed_response["embeddingsByType"]["binary"]
+            elif ("embeddingsByType" in _parsed_response and
+                  "float" in _parsed_response["embeddingsByType"]):
+                # Use float data from embeddingsByType
+                embedding_data = _parsed_response["embeddingsByType"]["float"]
+            elif "embedding" in _parsed_response:
+                # Fallback to legacy embedding field
+                embedding_data = _parsed_response["embedding"]
+            else:
+                raise ValueError(f"No embedding data found in response: {response}")
+
             transformed_responses.append(
                 Embedding(
-                    embedding=_parsed_response["embedding"],
+                    embedding=embedding_data,
                     index=index,
                     object="embedding",
                 )
diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py
@@ -328,15 +328,22 @@ class CohereEmbeddingResponse(TypedDict):
     texts: List[str]
 
 
-class AmazonTitanV2EmbeddingRequest(TypedDict):
-    inputText: str
+class AmazonTitanV2EmbeddingRequest(TypedDict, total=False):
+    inputText: Required[str]
     dimensions: int
     normalize: bool
+    embeddingTypes: List[Literal["float", "binary"]]
 
 
-class AmazonTitanV2EmbeddingResponse(TypedDict):
-    embedding: List[float]
-    inputTextTokenCount: int
+class AmazonTitanV2EmbeddingsByType(TypedDict, total=False):
+    binary: List[int]  # Array of integers for binary format
+    float: List[float]  # Array of floats for float format
+
+
+class AmazonTitanV2EmbeddingResponse(TypedDict, total=False):
+    embedding: List[float]  # Legacy field - array of floats (backward compatibility)
+    embeddingsByType: AmazonTitanV2EmbeddingsByType  # New format per AWS schema
+    inputTextTokenCount: Required[int]  # Always present in AWS response
 
 
 class AmazonTitanG1EmbeddingRequest(TypedDict):
diff --git a/tests/test_litellm/llms/bedrock/embed/test_bedrock_embedding.py b/tests/test_litellm/llms/bedrock/embed/test_bedrock_embedding.py
@@ -2,11 +2,12 @@
 import os
 import sys
 from unittest.mock import Mock, patch
+
 import pytest
 
 sys.path.insert(0, os.path.abspath("../../../../.."))  # Adds the parent directory to the system path
 import litellm
-from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 
 # Mock responses for different embedding models
 titan_embedding_response = {
@@ -146,7 +147,7 @@ def test_bedrock_embedding_with_sigv4():
     """Test embedding falls back to SigV4 auth when no bearer token is provided"""
     litellm.set_verbose = True
     model = "bedrock/amazon.titan-embed-text-v1"
-    
+
     with patch("litellm.llms.bedrock.embed.embedding.BedrockEmbedding.embeddings") as mock_bedrock_embed:
         mock_embedding_response = litellm.EmbeddingResponse()
         mock_embedding_response.data = [{"embedding": [0.1, 0.2, 0.3]}]
@@ -159,4 +160,85 @@ def test_bedrock_embedding_with_sigv4():
         )
 
         assert isinstance(response, litellm.EmbeddingResponse)
-        mock_bedrock_embed.assert_called_once()
+        mock_bedrock_embed.assert_called_once()
+
+
+def test_bedrock_titan_v2_encoding_format_float():
+    """Test amazon.titan-embed-text-v2:0 with encoding_format=float parameter"""
+    litellm.set_verbose = True
+    client = HTTPHandler()
+    test_api_key = "test-bearer-token-12345"
+    model = "bedrock/amazon.titan-embed-text-v2:0"
+
+    # Mock response with embeddingsByType for binary format (addressing issue #14680)
+    titan_v2_response = {
+        "embedding": [0.1, 0.2, 0.3],
+        "inputTextTokenCount": 10
+    }
+
+    with patch.object(client, "post") as mock_post:
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.text = json.dumps(titan_v2_response)
+        mock_response.json = lambda: json.loads(mock_response.text)
+        mock_post.return_value = mock_response
+
+        response = litellm.embedding(
+            model=model,
+            input=test_input,
+            encoding_format="float",  # This should work but currently throws UnsupportedParamsError
+            client=client,
+            aws_region_name="us-east-1",
+            aws_bedrock_runtime_endpoint="https://bedrock-runtime.us-east-1.amazonaws.com",
+            api_key=test_api_key
+        )
+
+        assert isinstance(response, litellm.EmbeddingResponse)
+        assert isinstance(response.data[0]['embedding'], list)
+        assert len(response.data[0]['embedding']) == 3
+
+        # Verify that the request contains embeddingTypes: ["float"] instead of encoding_format
+        request_body = json.loads(mock_post.call_args.kwargs.get("data", "{}"))
+        assert "embeddingTypes" in request_body
+        assert request_body["embeddingTypes"] == ["float"]
+        assert "encoding_format" not in request_body
+
+
+def test_bedrock_titan_v2_encoding_format_base64():
+    """Test amazon.titan-embed-text-v2:0 with encoding_format=base64 parameter (maps to binary)"""
+    litellm.set_verbose = True
+    client = HTTPHandler()
+    test_api_key = "test-bearer-token-12345"
+    model = "bedrock/amazon.titan-embed-text-v2:0"
+
+    # Mock response with embeddingsByType for binary format
+    titan_v2_binary_response = {
+        "embeddingsByType": {
+            "binary": "YmluYXJ5X2VtYmVkZGluZ19kYXRh"  # base64 encoded binary data
+        },
+        "inputTextTokenCount": 10
+    }
+
+    with patch.object(client, "post") as mock_post:
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.text = json.dumps(titan_v2_binary_response)
+        mock_response.json = lambda: json.loads(mock_response.text)
+        mock_post.return_value = mock_response
+
+        response = litellm.embedding(
+            model=model,
+            input=test_input,
+            encoding_format="base64",  # This should map to embeddingTypes: ["binary"]
+            client=client,
+            aws_region_name="us-east-1",
+            aws_bedrock_runtime_endpoint="https://bedrock-runtime.us-east-1.amazonaws.com",
+            api_key=test_api_key
+        )
+
+        assert isinstance(response, litellm.EmbeddingResponse)
+
+        # Verify that the request contains embeddingTypes: ["binary"] for base64 encoding
+        request_body = json.loads(mock_post.call_args.kwargs.get("data", "{}"))
+        assert "embeddingTypes" in request_body
+        assert request_body["embeddingTypes"] == ["binary"]