Merge pull request #74 from swith004/content_detection_response_validation

gkumbhat · web-flow · commit 830896b56bdf · 2025-05-23T15:47:33.000-05:00
Add instance validation check for ContentDetectionResponseObject for new_result in process, and empty content response from detector
diff --git a/tests/generative_detectors/test_base.py b/tests/generative_detectors/test_base.py
@@ -21,7 +21,10 @@
 import pytest_asyncio
 
 # Local
-from vllm_detector_adapter.generative_detectors.base import ChatCompletionDetectionBase
+from vllm_detector_adapter.generative_detectors.base import (
+    ChatCompletionDetectionBase,
+    ErrorResponse,
+)
 from vllm_detector_adapter.protocol import (
     ContentsDetectionRequest,
     ContentsDetectionResponse,
@@ -204,3 +207,35 @@ def test_content_analysis_success(detection_base, completion_response):
         assert detections[1][0]["start"] == 0
         assert detections[1][0]["end"] == len(content_request.contents[1])
         assert detections[1][0]["metadata"] == {}
+
+
+def test_content_analysis_errorresponse_verification(detection_base):
+    """Test that content_analysis properly propagates an ErrorResponse when a choice has empty content."""
+    base_instance = asyncio.run(detection_base)
+    content_request = ContentsDetectionRequest(contents=["Where do I find geese?"])
+
+    # Simulate what the model would produce for a request: empty content triggers error
+    choice = ChatCompletionResponseChoice(
+        index=0,
+        message=ChatMessage(role="assistant", content=""),
+        logprobs=ChatCompletionLogProbs(content=[]),
+    )
+    completion_response = ChatCompletionResponse(
+        model="test-model",
+        choices=[choice],
+        usage=UsageInfo(prompt_tokens=1, total_tokens=2, completion_tokens=1),
+    )
+    scores = [0.5]
+    detection_type = "risk"
+    response = (completion_response, scores, detection_type)
+
+    # Patch the process_chat_completion_with_scores to return response
+    with patch(
+        "vllm_detector_adapter.generative_detectors.base.ChatCompletionDetectionBase.process_chat_completion_with_scores",
+        return_value=response,
+    ):
+        result = asyncio.run(base_instance.content_analysis(content_request))
+
+        assert isinstance(result, ErrorResponse)
+        assert result.type == "BadRequestError"
+        assert "does not have content" in result.message
diff --git a/tests/test_protocol.py b/tests/test_protocol.py
@@ -1,5 +1,6 @@
 # Standard
 from http import HTTPStatus
+from unittest.mock import patch
 
 # Third Party
 from vllm.entrypoints.openai.protocol import (
@@ -357,3 +358,36 @@ def test_response_from_completion_response_missing_content():
         in detection_response.message
     )
     assert detection_response.code == HTTPStatus.BAD_REQUEST.value
+
+
+def test_response_from_empty_string_content_detection():
+    choice_content_0 = ChatCompletionResponseChoice(
+        index=0,
+        message=ChatMessage(
+            role="assistant",
+            content="",
+        ),
+    )
+    chat_response_0 = ChatCompletionResponse(
+        model=MODEL_NAME,
+        choices=[choice_content_0],
+        usage=UsageInfo(prompt_tokens=136, total_tokens=140, completion_tokens=4),
+    )
+
+    contents = ["sample sentence 1"]
+    # scores for each content is a list of scores (for multi-label)
+    scores = [[0.9]]
+    detection_type = "risk"
+
+    detection_response = ContentsDetectionResponse.from_chat_completion_response(
+        [
+            (chat_response_0, scores[0], detection_type),
+        ],
+        contents,
+    )
+    assert type(detection_response) == ErrorResponse
+    assert (
+        "Choice 0 from chat completion does not have content"
+        in detection_response.message
+    )
+    assert detection_response.code == HTTPStatus.BAD_REQUEST.value
diff --git a/vllm_detector_adapter/generative_detectors/base.py b/vllm_detector_adapter/generative_detectors/base.py
@@ -447,6 +447,13 @@ async def content_analysis(
                     )
                 )
 
+                # Verify whether the new_result is the correct is an errorresponse, and if so, return the errorresponse
+                if isinstance(new_result, ErrorResponse):
+                    logger.debug(
+                        f"[content_analysis] ErrorResponse returned: {repr(new_result)}"
+                    )
+                    return new_result
+
                 processed_result.append(new_result)
 
         return ContentsDetectionResponse(root=processed_result)
diff --git a/vllm_detector_adapter/generative_detectors/llama_guard.py b/vllm_detector_adapter/generative_detectors/llama_guard.py
@@ -195,6 +195,13 @@ async def content_analysis(
                     )
                 )
 
+                # Verify whether the new_result is the correct is an errorresponse, and if so, return the errorresponse
+                if isinstance(new_result, ErrorResponse):
+                    logger.debug(
+                        f"[content_analysis] ErrorResponse returned: {repr(new_result)}"
+                    )
+                    return new_result
+
                 processed_result.append(new_result)
 
         return ContentsDetectionResponse(root=processed_result)
diff --git a/vllm_detector_adapter/protocol.py b/vllm_detector_adapter/protocol.py
@@ -82,7 +82,7 @@ def from_chat_completion_response(
             # NOTE: for providing spans, we currently consider entire generated text as a span.
             # This is because, at the time of writing, the generative guardrail models does not
             # provide specific information about input text, which can be used to deduce spans.
-            if content and isinstance(content, str):
+            if isinstance(content, str) and content.strip():
                 response_object = ContentsDetectionResponseObject(
                     detection_type=detection_type,
                     detection=content.strip(),
@@ -93,6 +93,7 @@ def from_chat_completion_response(
                     metadata=metadata_per_choice[i] if metadata_per_choice else {},
                 ).model_dump()
                 detection_responses.append(response_object)
+
             else:
                 # This case should be unlikely but we handle it since a detection
                 # can't be returned without the content
@@ -335,7 +336,7 @@ def from_chat_completion_response(
         detection_responses = []
         for i, choice in enumerate(response.choices):
             content = choice.message.content
-            if content and isinstance(content, str):
+            if isinstance(content, str) and content.strip():
                 response_object = DetectionResponseObject(
                     detection_type=detection_type,
                     detection=content.strip(),

Original file line number	Diff line number	Diff line change
`@@ -447,6 +447,13 @@ async def content_analysis(`
`447`	`447`	`)`
`448`	`448`	`)`
`449`	`449`
	`450`	`+ # Verify whether the new_result is the correct is an errorresponse, and if so, return the errorresponse`
	`451`	`+ if isinstance(new_result, ErrorResponse):`
	`452`	`+ logger.debug(`
	`453`	`+ f"[content_analysis] ErrorResponse returned: {repr(new_result)}"`
	`454`	`+ )`
	`455`	`+ return new_result`
	`456`	`+`
`450`	`457`	`processed_result.append(new_result)`
`451`	`458`
`452`	`459`	`return ContentsDetectionResponse(root=processed_result)`
Original file line number	Diff line number	Diff line change
`@@ -195,6 +195,13 @@ async def content_analysis(`
`195`	`195`	`)`
`196`	`196`	`)`
`197`	`197`
	`198`	`+ # Verify whether the new_result is the correct is an errorresponse, and if so, return the errorresponse`
	`199`	`+ if isinstance(new_result, ErrorResponse):`
	`200`	`+ logger.debug(`
	`201`	`+ f"[content_analysis] ErrorResponse returned: {repr(new_result)}"`
	`202`	`+ )`
	`203`	`+ return new_result`
	`204`	`+`
`198`	`205`	`processed_result.append(new_result)`
`199`	`206`
`200`	`207`	`return ContentsDetectionResponse(root=processed_result)`