fix(bedrock_guardrails.py): respect bedrock runtime endpoint when using guardrails

krrishdholakia · krrishdholakia · commit 1598d3e955e7 · 2025-09-17T18:00:58.000-07:00
Closes LIT-983
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -18,3 +18,15 @@ model_list:
 
 
 
+
+guardrails:
+  - guardrail_name: "intel-bedrock-guard-cfg"
+    litellm_params:
+      guardrail: bedrock
+      mode: [pre_call, post_call]
+      guardrailIdentifier: "1234"
+      guardrailVersion: "1"
+      aws_access_key_id: "os.environ/AWS_ACCESS_KEY_ID"
+      aws_secret_access_key: "os.environ/AWS_SECRET_ACCESS_KEY"
+      aws_bedrock_runtime_endpoint: "os.environ/AWS_BEDROCK_RUNTIME_ENDPOINT"
+      default_on: true
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -15,7 +15,7 @@
 import json
 import sys
 from typing import Any, AsyncGenerator, List, Literal, Optional, Tuple, Union
-from litellm.secret_managers.main import get_secret_str
+
 import httpx
 from fastapi import HTTPException
 
@@ -32,6 +32,7 @@
     httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.secret_managers.main import get_secret_str
 from litellm.types.guardrails import GuardrailEventHooks
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import (
@@ -118,18 +119,17 @@ def __init__(
         """
         If True, will not raise an exception when the guardrail is blocked.
         """
-        
 
         # Set supported event hooks to include MCP hooks
-        if 'supported_event_hooks' not in kwargs:
-            kwargs['supported_event_hooks'] = [
+        if "supported_event_hooks" not in kwargs:
+            kwargs["supported_event_hooks"] = [
                 GuardrailEventHooks.pre_call,
                 GuardrailEventHooks.post_call,
                 GuardrailEventHooks.during_call,
                 GuardrailEventHooks.pre_mcp_call,
                 GuardrailEventHooks.during_mcp_call,
             ]
-        
+
         super().__init__(**kwargs)
         BaseAWSLLM.__init__(self)
 
@@ -138,9 +138,10 @@ def __init__(
             self.guardrailIdentifier,
             self.guardrailVersion,
         )
-    
 
-    def _create_bedrock_input_content_request(self, messages: Optional[List[AllMessageValues]]) -> BedrockRequest:
+    def _create_bedrock_input_content_request(
+        self, messages: Optional[List[AllMessageValues]]
+    ) -> BedrockRequest:
         """
         Create a bedrock request for the input content - the LLM request.
         """
@@ -149,8 +150,8 @@ def _create_bedrock_input_content_request(self, messages: Optional[List[AllMessa
         if messages is None:
             return bedrock_request
         for message in messages:
-            message_text_content: Optional[List[str]] = (
-                self.get_content_for_message(message=message)
+            message_text_content: Optional[List[str]] = self.get_content_for_message(
+                message=message
             )
             if message_text_content is None:
                 continue
@@ -163,7 +164,9 @@ def _create_bedrock_input_content_request(self, messages: Optional[List[AllMessa
         bedrock_request["content"] = bedrock_request_content
         return bedrock_request
 
-    def _create_bedrock_output_content_request(self, response: Union[Any, ModelResponse]) -> BedrockRequest:
+    def _create_bedrock_output_content_request(
+        self, response: Union[Any, ModelResponse]
+    ) -> BedrockRequest:
         """
         Create a bedrock request for the output content - the LLM response.
         """
@@ -199,9 +202,13 @@ def convert_to_bedrock_format(
         """
         bedrock_request: BedrockRequest = BedrockRequest(source=source)
         if source == "INPUT":
-            bedrock_request = self._create_bedrock_input_content_request(messages=messages)
+            bedrock_request = self._create_bedrock_input_content_request(
+                messages=messages
+            )
         elif source == "OUTPUT":
-            bedrock_request = self._create_bedrock_output_content_request(response=response)
+            bedrock_request = self._create_bedrock_output_content_request(
+                response=response
+            )
         return bedrock_request
 
     #### CALL HOOKS - proxy only ####
@@ -255,9 +262,19 @@ def _prepare_request(
         headers = {"Content-Type": "application/json"}
         if extra_headers is not None:
             headers = {"Content-Type": "application/json", **extra_headers}
-        api_base = f"https://bedrock-runtime.{aws_region_name}.amazonaws.com/guardrail/{self.guardrailIdentifier}/version/{self.guardrailVersion}/apply"
+
+        aws_bedrock_runtime_endpoint = self.optional_params.get(
+            "aws_bedrock_runtime_endpoint", None
+        )
+        _, proxy_endpoint_url = self.get_runtime_endpoint(
+            api_base=None,
+            aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
+            aws_region_name=aws_region_name,
+        )
+        proxy_endpoint_url = f"{proxy_endpoint_url}/guardrail/{self.guardrailIdentifier}/version/{self.guardrailVersion}/apply"
+        # api_base = f"https://bedrock-runtime.{aws_region_name}.amazonaws.com/guardrail/{self.guardrailIdentifier}/version/{self.guardrailVersion}/apply"
         encoded_data = json.dumps(data).encode("utf-8")
-        
+
         # first check api-key, if none, fall back to sigV4
         if api_key is not None:
             aws_bearer_token: Optional[str] = api_key
@@ -268,21 +285,31 @@ def _prepare_request(
             try:
                 from botocore.awsrequest import AWSRequest
             except ImportError:
-                raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+                raise ImportError(
+                    "Missing boto3 to call bedrock. Run 'pip install boto3'."
+                )
             headers["Authorization"] = f"Bearer {aws_bearer_token}"
             request = AWSRequest(
-                method="POST", url=api_base, data=encoded_data, headers=headers
+                method="POST",
+                url=proxy_endpoint_url,
+                data=encoded_data,
+                headers=headers,
             )
         else:
             try:
                 from botocore.auth import SigV4Auth
                 from botocore.awsrequest import AWSRequest
             except ImportError:
-                raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+                raise ImportError(
+                    "Missing boto3 to call bedrock. Run 'pip install boto3'."
+                )
 
             sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
             request = AWSRequest(
-                method="POST", url=api_base, data=encoded_data, headers=headers
+                method="POST",
+                url=proxy_endpoint_url,
+                data=encoded_data,
+                headers=headers,
             )
             sigv4.add_auth(request)
             if (
@@ -294,20 +321,19 @@ def _prepare_request(
         return prepped_request
 
     async def make_bedrock_api_request(
-        self, 
+        self,
         source: Literal["INPUT", "OUTPUT"],
         messages: Optional[List[AllMessageValues]] = None,
         response: Optional[Union[Any, litellm.ModelResponse]] = None,
-        request_data: Optional[dict] = None
+        request_data: Optional[dict] = None,
     ) -> BedrockGuardrailResponse:
         from datetime import datetime
+
         start_time = datetime.now()
         credentials, aws_region_name = self._load_credentials()
         bedrock_request_data: dict = dict(
             self.convert_to_bedrock_format(
-                source=source, 
-                messages=messages, 
-                response=response
+                source=source, messages=messages, response=response
             )
         )
         bedrock_guardrail_response: BedrockGuardrailResponse = (
@@ -316,11 +342,13 @@ async def make_bedrock_api_request(
         api_key: Optional[str] = None
         if request_data:
             bedrock_request_data.update(
-                self.get_guardrail_dynamic_request_body_params(request_data=request_data)
+                self.get_guardrail_dynamic_request_body_params(
+                    request_data=request_data
+                )
             )
             if request_data.get("api_key") is not None:
                 api_key = request_data["api_key"]
-    
+
         prepared_request = self._prepare_request(
             credentials=credentials,
             data=bedrock_request_data,
@@ -346,7 +374,9 @@ async def make_bedrock_api_request(
         self.add_standard_logging_guardrail_information_to_request_data(
             guardrail_json_response=response.json(),
             request_data=request_data or {},
-            guardrail_status=self._get_bedrock_guardrail_response_status(response=response),
+            guardrail_status=self._get_bedrock_guardrail_response_status(
+                response=response
+            ),
             start_time=start_time.timestamp(),
             end_time=datetime.now().timestamp(),
             duration=(datetime.now() - start_time).total_seconds(),
@@ -372,16 +402,20 @@ async def make_bedrock_api_request(
             )
 
         return bedrock_guardrail_response
-    
-    def _get_bedrock_guardrail_response_status(self, response: httpx.Response) -> Literal["success", "failure"]:
+
+    def _get_bedrock_guardrail_response_status(
+        self, response: httpx.Response
+    ) -> Literal["success", "failure"]:
         """
         Get the status of the bedrock guardrail response.
         """
         if response.status_code == 200:
             return "success"
         return "failure"
 
-    def _get_http_exception_for_blocked_guardrail(self, response: BedrockGuardrailResponse) -> HTTPException:
+    def _get_http_exception_for_blocked_guardrail(
+        self, response: BedrockGuardrailResponse
+    ) -> HTTPException:
         """
         Get the HTTP exception for a blocked guardrail.
         """
@@ -393,17 +427,15 @@ def _get_http_exception_for_blocked_guardrail(self, response: BedrockGuardrailRe
             for output in outputs:
                 if output.get("text"):
                     bedrock_guardrail_output_text += output.get("text") or ""
-                
-        
+
         return HTTPException(
             status_code=400,
             detail={
-                "error": "Violated guardrail policy", 
+                "error": "Violated guardrail policy",
                 "bedrock_guardrail_response": bedrock_guardrail_output_text,
-            }
+            },
         )
 
-
     def _should_raise_guardrail_blocked_exception(
         self, response: BedrockGuardrailResponse
     ) -> bool:
@@ -416,7 +448,7 @@ def _should_raise_guardrail_blocked_exception(
         # if user opted into masking, return False. since we'll use the masked output from the guardrail
         if self.mask_request_content or self.mask_response_content:
             return False
-        
+
         if self.disable_exception_on_block is True:
             return False
 
@@ -631,9 +663,7 @@ async def async_post_call_success_hook(
         ########## 1. Make parallel Bedrock API requests ##########
         #########################################################
         output_content_bedrock = await self.make_bedrock_api_request(
-            source="OUTPUT", 
-            response=response,
-            request_data=data
+            source="OUTPUT", response=response, request_data=data
         )  # Only response
 
         #########################################################
@@ -729,16 +759,16 @@ async def async_post_call_streaming_iterator_hook(
             ###################################################################
             # Create tasks for parallel execution
             input_task = self.make_bedrock_api_request(
-                source="INPUT", messages=request_data.get("messages"), request_data=request_data
+                source="INPUT",
+                messages=request_data.get("messages"),
+                request_data=request_data,
             )  # Only input messages
             output_task = self.make_bedrock_api_request(
                 source="OUTPUT", response=assembled_model_response
             )  # Only response
 
             # Execute both requests in parallel
-            _, output_guardrail_response = await asyncio.gather(
-                input_task, output_task
-            )
+            _, output_guardrail_response = await asyncio.gather(input_task, output_task)
 
             #########################################################################
             ########## 2. Apply masking to response with output guardrail response ##########
@@ -891,7 +921,7 @@ def _apply_masking_to_response(
     ) -> None:
         """
         Apply masked content from bedrock guardrail to the response object.
-        
+
         Args:
             response: The response object to modify
             bedrock_guardrail_response: Response from Bedrock guardrail containing masked content
@@ -902,7 +932,9 @@ def _apply_masking_to_response(
         )
 
         if not masked_texts:
-            verbose_proxy_logger.debug("No masked outputs found, skipping response masking")
+            verbose_proxy_logger.debug(
+                "No masked outputs found, skipping response masking"
+            )
             return
 
         verbose_proxy_logger.debug(
@@ -922,13 +954,13 @@ def _apply_masking_to_model_response(
     ) -> None:
         """
         Apply masked texts to a ModelResponse object.
-        
+
         Args:
             response: The ModelResponse object to modify in-place
             masked_texts: List of masked text strings from guardrail
         """
         masking_index = 0
-        
+
         for choice in response.choices:
             if isinstance(choice, Choices):
                 # For chat completions