Fix: Bedrock Application Inference Profile (AIP) is not streaming response with ChatBedrockConverse (#568)

0x-fang · michaelnchin · web-flow · commit ea4b274be704 · 2025-08-05T14:35:21.000-07:00
### Description This PR fixed the streaming issue with ChatBedrockConverse when input is Application Inference Profile (AIP). The issue happened because langchain-aws fails to identify the foundation model used in AIP (e.g., `arn:aws:bedrock:us-east-1:111111484058:application-inference-profile/c3myu2h6fllr`), therefore, it cannot set the streaming_support flag for the AIP correctly. So the entire response was returned to user as a whole rather than streaming the response back. ### Solution We have to create a Bedrock client to call Bedrock get_inference_profile control plane [API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock/client/get_inference_profile.html), and parse the foundation model id from the response. However, the existing `set_disable_streaming` function works on the raw user input before the ChatBedrockConverse object is instantiated, so we have to extract the logic of determining streaming_support for models into a common function `_get_streaming_support` and invoke it in both places (i.e., the original place that works on raw user input, and the new place that works on the resolved model Id from get_inference_profile API call). ### Issue #538 ### Test * add new unit tests in libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py and all passed * run through the integration test for ChatBedrockConverse and all passed except one existing failure * The main branch has failed this integration test already, which is not caused by new change. ``` FAILED tests/integration_tests/chat_models/test_bedrock_converse.py::test_structured_output_tool_choice_not_supported - assert 1 == 0 ``` --------- Co-authored-by: Michael Chin <mchin188@yahoo.com>
diff --git a/libs/aws/langchain_aws/chat_models/bedrock_converse.py b/libs/aws/langchain_aws/chat_models/bedrock_converse.py
@@ -312,6 +312,10 @@ class Joke(BaseModel):
     """  # noqa: E501
 
     client: Any = Field(default=None, exclude=True)  #: :meta private:
+    """The bedrock runtime client for making data plane API calls"""
+
+    bedrock_client: Any = Field(default=None, exclude=True)  #: :meta private:
+    """The bedrock client for making control plane API calls"""
 
     model_id: str = Field(alias="model")
     """Id of the model to call.
@@ -500,29 +504,15 @@ def build_extra(cls, values: dict[str, Any]) -> Any:
             }
         return values
 
-    @model_validator(mode="before")
     @classmethod
-    def set_disable_streaming(cls, values: Dict) -> Any:
-        model_id = values.get("model_id", values.get("model"))
-
-        # Extract provider from the model_id
-        # (e.g., "amazon", "anthropic", "ai21", "meta", "mistral")
-        if "provider" not in values:
-            if model_id.startswith("arn"):
-                raise ValueError(
-                    "Model provider should be supplied when passing a model ARN as model_id."
-                )
-            model_parts = model_id.split(".")
-            values["provider"] = (
-                model_parts[-2] if len(model_parts) > 1 else model_parts[0]
-            )
-
-        provider = values["provider"]
-
-        model_id_lower = values.get(
-            "base_model_id", values.get("base_model", model_id)
-        ).lower()
-
+    def _get_streaming_support(cls, provider: str, model_id_lower: str) -> Union[bool, str]:
+        """Determine streaming support for a given provider and model.
+        
+        Returns:
+            True: Full streaming support
+            "no_tools": Streaming supported but not with tools
+            False: No streaming support
+        """
         # Determine if the model supports plain-text streaming (ConverseStream)
         # Here we check based on the updated AWS documentation.
         if (
@@ -550,7 +540,7 @@ def set_disable_streaming(cls, values: Dict) -> Any:
             # Cohere Command R models
             (provider == "cohere" and "command-r" in model_id_lower)
         ):
-            streaming_support = True
+            return True
         elif (
             # AI21 Jamba-Instruct model
             (provider == "ai21" and "jamba-instruct" in model_id_lower)
@@ -583,9 +573,34 @@ def set_disable_streaming(cls, values: Dict) -> Any:
             # Writer Palmyra models
             (provider == "writer" and "palmyra" in model_id_lower)
         ):
-            streaming_support = "no_tools"
+            return "no_tools"
         else:
-            streaming_support = False
+            return False
+
+    @model_validator(mode="before")
+    @classmethod
+    def set_disable_streaming(cls, values: Dict) -> Any:
+        model_id = values.get("model_id", values.get("model"))
+        
+        # Extract provider from the model_id
+        # (e.g., "amazon", "anthropic", "ai21", "meta", "mistral")
+        if "provider" not in values:
+            if model_id.startswith("arn"):
+                raise ValueError(
+                    "Model provider should be supplied when passing a model ARN as model_id."
+                )
+            model_parts = model_id.split(".")
+            values["provider"] = (
+                model_parts[-2] if len(model_parts) > 1 else model_parts[0]
+            )
+
+        provider = values["provider"]
+
+        model_id_lower = values.get(
+            "base_model_id", values.get("base_model", model_id)
+        ).lower()
+
+        streaming_support = cls._get_streaming_support(provider, model_id_lower)
 
         # Set the disable_streaming flag accordingly:
         # - If streaming is supported (plain streaming),
@@ -606,6 +621,23 @@ def set_disable_streaming(cls, values: Dict) -> Any:
     @model_validator(mode="after")
     def validate_environment(self) -> Self:
         """Validate that AWS credentials to and python package exists in environment."""
+        
+         # Create bedrock client for control plane API call
+        if self.bedrock_client is None:
+            self.bedrock_client = create_aws_client(
+                region_name=self.region_name,
+                credentials_profile_name=self.credentials_profile_name,
+                aws_access_key_id=self.aws_access_key_id,
+                aws_secret_access_key=self.aws_secret_access_key,
+                aws_session_token=self.aws_session_token,
+                endpoint_url=self.endpoint_url,
+                config=self.config,
+                service_name="bedrock",
+            )
+            
+        # Handle streaming configuration for application inference profiles
+        if "application-inference-profile" in self.model_id:
+            self._configure_streaming_for_resolved_model()
 
         # As of 12/03/24:
         # only claude-3/4, mistral-large, and nova models support tool choice:
@@ -649,10 +681,36 @@ def validate_environment(self) -> Self:
                 "Provide a guardrail via `guardrail_config` or "
                 "disable `guard_last_turn_only`."
             )
+            
         return self
 
     def _get_base_model(self) -> str:
+        # identify the base model id used in the application inference profile (AIP)
+        # Format: arn:aws:bedrock:us-east-1:<accountId>:application-inference-profile/<id>
+        if self.base_model_id is None and 'application-inference-profile' in self.model_id:
+            response = self.bedrock_client.get_inference_profile(
+                inferenceProfileIdentifier=self.model_id
+            )
+            if 'models' in response and len(response['models']) > 0:
+                model_arn = response['models'][0]['modelArn']
+                # Format: arn:aws:bedrock:region::foundation-model/provider.model-name
+                self.base_model_id = model_arn.split('/')[-1]
         return self.base_model_id if self.base_model_id else self.model_id
+        
+    def _configure_streaming_for_resolved_model(self) -> None:
+        """Configure streaming support after resolving the base model for application inference profiles."""
+        base_model = self._get_base_model()
+        model_id_lower = base_model.lower()
+        
+        streaming_support = self._get_streaming_support(self.provider, model_id_lower)
+
+        # Set the disable_streaming flag accordingly
+        if not streaming_support:
+            self.disable_streaming = True
+        elif streaming_support == "no_tools":
+            self.disable_streaming = "tool_calling"
+        else:
+            self.disable_streaming = False
 
     def _apply_guard_last_turn_only(self, messages: List[Dict[str, Any]]) -> None:
         for msg in reversed(messages):
diff --git a/libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py b/libs/aws/tests/unit_tests/chat_models/test_bedrock_converse.py
@@ -1383,3 +1383,192 @@ def test_stream_guard_last_turn_only() -> None:
     assert bedrock_msgs[-1]["content"][0] == {
         "guardContent": {"text": {"text": "How are you?"}}
     }
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_bedrock_client_creation(mock_create_client: mock.Mock) -> None:
+    """Test that bedrock_client is created during validation."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="anthropic.claude-3-sonnet-20240229-v1:0",
+        region_name="us-west-2"
+    )
+    
+    assert chat_model.bedrock_client == mock_bedrock_client
+    assert chat_model.client == mock_runtime_client
+    assert mock_create_client.call_count == 2
+
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_get_base_model_with_application_inference_profile(mock_create_client: mock.Mock) -> None:
+    """Test _get_base_model method with application inference profile."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    # Mock the get_inference_profile response
+    mock_bedrock_client.get_inference_profile.return_value = {
+        'models': [
+            {
+                'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0'
+            }
+        ]
+    }
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile",
+        region_name="us-west-2",
+        provider="anthropic"
+    )
+    
+    base_model = chat_model._get_base_model()
+    assert base_model == "anthropic.claude-3-sonnet-20240229-v1:0"
+    mock_bedrock_client.get_inference_profile.assert_called_once_with(
+        inferenceProfileIdentifier="arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile"
+    )
+
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_get_base_model_without_application_inference_profile(mock_create_client: mock.Mock) -> None:
+    """Test _get_base_model method without application inference profile."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="anthropic.claude-3-sonnet-20240229-v1:0",
+        region_name="us-west-2",
+        provider="anthropic"
+    )
+    
+    base_model = chat_model._get_base_model()
+    assert base_model == "anthropic.claude-3-sonnet-20240229-v1:0"
+    mock_bedrock_client.get_inference_profile.assert_not_called()
+
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_configure_streaming_for_resolved_model(mock_create_client: mock.Mock) -> None:
+    """Test _configure_streaming_for_resolved_model method."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    # Mock the get_inference_profile response for a model with full streaming support
+    mock_bedrock_client.get_inference_profile.return_value = {
+        'models': [
+            {
+                'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0'
+            }
+        ]
+    }
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile",
+        region_name="us-west-2",
+        provider="anthropic"
+    )
+    
+    # The streaming should be configured based on the resolved model
+    assert chat_model.disable_streaming is False
+
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_configure_streaming_for_resolved_model_no_tools(mock_create_client: mock.Mock) -> None:
+    """Test _configure_streaming_for_resolved_model method with no-tools streaming."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    # Mock the get_inference_profile response for a model with no-tools streaming support
+    mock_bedrock_client.get_inference_profile.return_value = {
+        'models': [
+            {
+                'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-text-express-v1'
+            }
+        ]
+    }
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile",
+        region_name="us-west-2",
+        provider="amazon"
+    )
+    
+    # The streaming should be configured as "tool_calling" for no-tools models
+    assert chat_model.disable_streaming == "tool_calling"
+
+
+@mock.patch("langchain_aws.chat_models.bedrock_converse.create_aws_client")
+def test_configure_streaming_for_resolved_model_no_streaming(mock_create_client: mock.Mock) -> None:
+    """Test _configure_streaming_for_resolved_model method with no streaming support."""
+    mock_bedrock_client = mock.Mock()
+    mock_runtime_client = mock.Mock()
+    
+    # Mock the get_inference_profile response for a model with no streaming support
+    mock_bedrock_client.get_inference_profile.return_value = {
+        'models': [
+            {
+                'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/stability.stable-image-core-v1:0'
+            }
+        ]
+    }
+    
+    def side_effect(service_name: str, **kwargs: Any) -> mock.Mock:
+        if service_name == "bedrock":
+            return mock_bedrock_client
+        elif service_name == "bedrock-runtime":
+            return mock_runtime_client
+        return mock.Mock()
+    
+    mock_create_client.side_effect = side_effect
+    
+    chat_model = ChatBedrockConverse(
+        model="arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile",
+        region_name="us-west-2",
+        provider="stability"
+    )
+    
+    # The streaming should be disabled for models with no streaming support
+    assert chat_model.disable_streaming is True