Merge pull request #14758 from TeddyAmkie/model-armor-restore

krrishdholakia · web-flow · commit 197e98b6fb91 · 2025-09-21T21:18:53.000-07:00
Resolve Model Armor gaps (Pdfs, basic
diff --git a/litellm/proxy/guardrails/guardrail_hooks/model_armor/model_armor.py b/litellm/proxy/guardrails/guardrail_hooks/model_armor/model_armor.py
@@ -88,11 +88,11 @@ def _get_api_endpoint(self) -> str:
     def _create_sanitize_request(
         self, content: str, source: Literal["user_prompt", "model_response"]
     ) -> dict:
-        """Create request body for Model Armor API."""
+        """Create request body for Model Armor API with correct camelCase field names."""
         if source == "user_prompt":
-            return {"user_prompt_data": {"text": content}}
+            return {"userPromptData": {"text": content}}
         else:
-            return {"model_response_data": {"text": content}}
+            return {"modelResponseData": {"text": content}}
 
     def _extract_content_from_response(
         self, response: Union[Any, ModelResponse]
@@ -119,11 +119,16 @@ def _extract_content_from_response(
 
     async def make_model_armor_request(
         self,
-        content: str,
-        source: Literal["user_prompt", "model_response"],
+        content: Optional[str] = None,
+        source: Literal["user_prompt", "model_response"] = "user_prompt",
         request_data: Optional[dict] = None,
+        file_bytes: Optional[bytes] = None,
+        file_type: Optional[str] = None,
     ) -> dict:
-        """Make request to Model Armor API."""
+        """
+        Make request to Model Armor API. Supports both text and file prompt sanitization.
+        If file_bytes and file_type are provided, file prompt sanitization is performed.
+        """
         # Get access token using VertexBase auth
         access_token, resolved_project_id = await self._ensure_access_token_async(
             credentials=self.credentials,
@@ -143,7 +148,14 @@ async def make_model_armor_request(
             url = f"{endpoint}/v1/projects/{self.project_id}/locations/{self.location}/templates/{self.template_id}:sanitizeModelResponse"
 
         # Create request body
-        body = self._create_sanitize_request(content, source)
+        if file_bytes is not None and file_type is not None:
+            body = self.sanitize_file_prompt(file_bytes, file_type, source)
+        elif content is not None:
+            body = self._create_sanitize_request(content, source)
+        else:
+            raise ValueError(
+                "Either content or file_bytes and file_type must be provided."
+            )
 
         # Set headers
         headers = {
@@ -189,57 +201,110 @@ async def make_model_armor_request(
             return await json_response
         return json_response
 
+    def sanitize_file_prompt(
+        self, file_bytes: bytes, file_type: str, source: str = "user_prompt"
+    ) -> dict:
+        """
+        Helper to build the request body for file prompt sanitization for Model Armor.
+        file_type should be one of: PLAINTEXT_UTF8, PDF, WORD_DOCUMENT, EXCEL_DOCUMENT, POWERPOINT_DOCUMENT, TXT, CSV
+        Returns the request body dict.
+        """
+        import base64
+
+        base64_data = base64.b64encode(file_bytes).decode("utf-8")
+        if source == "user_prompt":
+            return {
+                "userPromptData": {
+                    "byteItem": {"byteDataType": file_type, "byteData": base64_data}
+                }
+            }
+        else:
+            return {
+                "modelResponseData": {
+                    "byteItem": {"byteDataType": file_type, "byteData": base64_data}
+                }
+            }
+
     def _should_block_content(self, armor_response: dict) -> bool:
-        """Check if Model Armor response indicates content should be blocked."""
-        # Check the sanitizationResult from Model Armor API
+        """Check if Model Armor response indicates content should be blocked, including both inspectResult and deidentifyResult."""
         sanitization_result = armor_response.get("sanitizationResult", {})
         filter_results = sanitization_result.get("filterResults", {})
 
-        # Check blocking filters (these should cause the request to be blocked)
-        # RAI (Responsible AI) filters
-        rai_results = filter_results.get("rai", {}).get("raiFilterResult", {})
-        if rai_results.get("matchState") == "MATCH_FOUND":
-            return True
-
-        # Prompt injection and jailbreak filters
-        pi_jailbreak = filter_results.get("piAndJailbreakFilterResult", {})
-        if pi_jailbreak.get("matchState") == "MATCH_FOUND":
-            return True
-
-        # Malicious URI filters
-        malicious_uri = filter_results.get("maliciousUriFilterResult", {})
-        if malicious_uri.get("matchState") == "MATCH_FOUND":
-            return True
-
-        # CSAM filters
-        csam = filter_results.get("csamFilterFilterResult", {})
-        if csam.get("matchState") == "MATCH_FOUND":
-            return True
-
-        # Virus scan filters
-        virus_scan = filter_results.get("virusScanFilterResult", {})
-        if virus_scan.get("matchState") == "MATCH_FOUND":
-            return True
-
+        # filterResults can be a dict (named keys) or a list (array of filter result dicts)
+        filter_result_items = []
+        if isinstance(filter_results, dict):
+            filter_result_items = [filter_results]
+        elif isinstance(filter_results, list):
+            filter_result_items = filter_results
+
+        for filt in filter_result_items:
+            # Check RAI, PI/Jailbreak, Malicious URI, CSAM, Virus scan as before
+            if filt.get("raiFilterResult", {}).get("matchState") == "MATCH_FOUND":
+                return True
+            if (
+                filt.get("piAndJailbreakFilterResult", {}).get("matchState")
+                == "MATCH_FOUND"
+            ):
+                return True
+            if (
+                filt.get("maliciousUriFilterResult", {}).get("matchState")
+                == "MATCH_FOUND"
+            ):
+                return True
+            if (
+                filt.get("csamFilterFilterResult", {}).get("matchState")
+                == "MATCH_FOUND"
+            ):
+                return True
+            if filt.get("virusScanFilterResult", {}).get("matchState") == "MATCH_FOUND":
+                return True
+            # Check sdpFilterResult for both inspectResult and deidentifyResult
+            sdp = filt.get("sdpFilterResult")
+            if sdp:
+                if sdp.get("inspectResult", {}).get("matchState") == "MATCH_FOUND":
+                    return True
+                if sdp.get("deidentifyResult", {}).get("matchState") == "MATCH_FOUND":
+                    return True
+        # Fallback dict code removed; all cases handled above
         return False
 
     def _get_sanitized_content(self, armor_response: dict) -> Optional[str]:
-        """Extract sanitized content from Model Armor response."""
-        # Model Armor returns sanitized content in the sanitizationResult
-        sanitization_result = armor_response.get("sanitizationResult", {})
-
-        # Check for sdp structure (for deidentification)
-        filter_results = sanitization_result.get("filterResults", {})
-        sdp = filter_results.get("sdp", {}).get("sdpFilterResult")
-
-        if sdp is not None:
-            # Model Armor returns sanitized text under deidentifyResult in sdp
-            deidentify_result = sdp.get("deidentifyResult", {})
-            sanitized_text = deidentify_result.get("data", {}).get("text", "")
-            if deidentify_result.get("matchState") == "MATCH_FOUND" and sanitized_text:
-                return sanitized_text
+        """
+        Get the sanitized content from a Model Armor response, if available.
+        Looks for sanitized text in deidentifyResult, and falls back to root-level fields if not found.
+        """
+        result = armor_response.get("sanitizationResult", {})
+        filter_results = result.get("filterResults", {})
+
+        # filterResults can be a dict (single filter) or a list (multiple filters)
+        filters = (
+            [filter_results]
+            if isinstance(filter_results, dict)
+            else filter_results
+            if isinstance(filter_results, list)
+            else []
+        )
 
-        # Fallback to checking root level
+        # Prefer sanitized text from deidentifyResult if present
+        for filter_entry in filters:
+            sdp = filter_entry.get("sdpFilterResult")
+            if sdp:
+                deid = sdp.get("deidentifyResult", {})
+                sanitized = deid.get("data", {}).get("text", "")
+                # If Model Armor found something and returned a sanitized version, use it
+                if deid.get("matchState") == "MATCH_FOUND" and sanitized:
+                    return sanitized
+
+        # If no deidentifyResult, optionally check for inspectResult (rare, but could have findings)
+        for filter_entry in filters:
+            sdp = filter_entry.get("sdpFilterResult")
+            if sdp:
+                inspect = sdp.get("inspectResult", {})
+                # If Model Armor flagged something but didn't sanitize, return None
+                if inspect.get("matchState") == "MATCH_FOUND":
+                    return None
+
+        # Fallback: if Model Armor put sanitized text at the root, use it
         return armor_response.get("sanitizedText") or armor_response.get("text")
 
     def _process_response(
diff --git a/tests/guardrails_tests/test_model_armor_file_sanitization.py b/tests/guardrails_tests/test_model_armor_file_sanitization.py
@@ -0,0 +1,75 @@
+import sys
+import os
+import pytest
+from unittest.mock import AsyncMock
+from fastapi import HTTPException
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm.proxy.guardrails.guardrail_hooks.model_armor.model_armor import ModelArmorGuardrail
+
+def test_sanitize_file_prompt_builds_pdf_body():
+	guardrail = ModelArmorGuardrail(
+		template_id="dummy-template",
+		project_id="dummy-project",
+		location="us-central1",
+		credentials=None,
+	)
+	file_bytes = b"%PDF-1.4 some pdf content"
+	file_type = "PDF"
+	body = guardrail.sanitize_file_prompt(file_bytes, file_type, source="user_prompt")
+	assert "userPromptData" in body
+	assert body["userPromptData"]["byteItem"]["byteDataType"] == "PDF"
+	import base64
+	assert body["userPromptData"]["byteItem"]["byteData"] == base64.b64encode(file_bytes).decode("utf-8")
+
+@pytest.mark.asyncio
+async def test_make_model_armor_request_file_prompt():
+	guardrail = ModelArmorGuardrail(
+		template_id="dummy-template",
+		project_id="dummy-project",
+		location="us-central1",
+		credentials=None,
+	)
+	file_bytes = b"My SSN is 123-45-6789."
+	file_type = "PLAINTEXT_UTF8"
+	armor_response = {
+		"sanitizationResult": {
+			"filterResults": [
+				{
+					"sdpFilterResult": {
+						"inspectResult": {
+							"executionState": "EXECUTION_SUCCESS",
+							"matchState": "MATCH_FOUND",
+							"findings": [
+								{"infoType": "US_SOCIAL_SECURITY_NUMBER", "likelihood": "LIKELY"}
+							]
+						},
+						"deidentifyResult": {
+							"executionState": "EXECUTION_SUCCESS",
+							"matchState": "MATCH_FOUND",
+							"data": {"text": "My SSN is [REDACTED]."}
+						}
+					}
+				}
+			]
+		}
+	}
+	class MockResponse:
+		def __init__(self, status_code, text, json_data):
+			self.status_code = status_code
+			self.text = text
+			self._json = json_data
+		def json(self):
+			return self._json
+	class MockHandler:
+		async def post(self, url, json, headers):
+			return MockResponse(200, str(armor_response), armor_response)
+	guardrail.async_handler = MockHandler()
+	guardrail._ensure_access_token_async = AsyncMock(return_value=("dummy-token", "dummy-project"))
+	result = await guardrail.make_model_armor_request(
+		file_bytes=file_bytes,
+		file_type=file_type,
+		source="user_prompt"
+	)
+	assert result["sanitizationResult"]["filterResults"][0]["sdpFilterResult"]["deidentifyResult"]["data"]["text"] == "My SSN is [REDACTED]."
diff --git a/tests/guardrails_tests/test_model_armor_guardrail.py b/tests/guardrails_tests/test_model_armor_guardrail.py
@@ -0,0 +1,99 @@
+import sys
+import os
+import pytest
+from unittest.mock import AsyncMock, patch
+from fastapi import HTTPException
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm.proxy.guardrails.guardrail_hooks.model_armor.model_armor import ModelArmorGuardrail
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.caching.caching import DualCache
+
+@pytest.mark.asyncio
+async def test_model_armor_pre_call_hook_inspect_and_deidentify():
+    """
+    Test Model Armor guardrail pre-call hook for both inspectResult and deidentifyResult handling.
+    """
+    guardrail = ModelArmorGuardrail(
+        template_id="dummy-template",
+        project_id="dummy-project",
+        location="us-central1",
+        credentials=None,
+    )
+    armor_response = {
+        "sanitizationResult": {
+            "filterResults": [
+                {
+                    "sdpFilterResult": {
+                        "inspectResult": {
+                            "executionState": "EXECUTION_SUCCESS",
+                            "matchState": "NO_MATCH_FOUND",
+                            "findings": []
+                        },
+                        "deidentifyResult": {
+                            "executionState": "EXECUTION_SUCCESS",
+                            "matchState": "MATCH_FOUND",
+                            "data": {"text": "sanitized text here"}
+                        }
+                    }
+                }
+            ]
+        }
+    }
+    with patch.object(guardrail, "make_model_armor_request", AsyncMock(return_value=armor_response)):
+        user_api_key_dict = UserAPIKeyAuth(api_key="test_key")
+        cache = DualCache()
+        data = {
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "My SSN is 123-45-6789."}
+            ],
+            "model": "gpt-3.5-turbo",
+            "metadata": {}
+        }
+        guardrail.mask_request_content = True
+        with pytest.raises(HTTPException) as exc_info:
+            await guardrail.async_pre_call_hook(
+                user_api_key_dict=user_api_key_dict,
+                cache=cache,
+                data=data,
+                call_type="completion"
+            )
+        assert exc_info.value.status_code == 400
+        assert "Content blocked by Model Armor" in str(exc_info.value.detail)
+
+def test_model_armor_should_block_content():
+    guardrail = ModelArmorGuardrail(
+        template_id="dummy-template",
+        project_id="dummy-project",
+        location="us-central1",
+        credentials=None,
+    )
+    # Block on inspectResult
+    armor_response_inspect = {
+        "sanitizationResult": {
+            "filterResults": [
+                {"sdpFilterResult": {"inspectResult": {"matchState": "MATCH_FOUND"}}}
+            ]
+        }
+    }
+    assert guardrail._should_block_content(armor_response_inspect)
+    # Block on deidentifyResult
+    armor_response_deidentify = {
+        "sanitizationResult": {
+            "filterResults": [
+                {"sdpFilterResult": {"deidentifyResult": {"matchState": "MATCH_FOUND"}}}
+            ]
+        }
+    }
+    assert guardrail._should_block_content(armor_response_deidentify)
+    # No block if neither
+    armor_response_none = {
+        "sanitizationResult": {
+            "filterResults": [
+                {"sdpFilterResult": {"inspectResult": {"matchState": "NO_MATCH_FOUND"}, "deidentifyResult": {"matchState": "NO_MATCH_FOUND"}}}
+            ]
+        }
+    }
+    assert not guardrail._should_block_content(armor_response_none)