:white_check_marks: Push test for base class and fix response massaging issue

gkumbhat · gkumbhat · commit d70b5cf87514 · 2025-02-19T11:24:32.000-06:00
Signed-off-by: Gaurav-Kumbhat &lt;Gaurav.Kumbhat@ibm.com&gt;
diff --git a/tests/generative_detectors/test_base.py b/tests/generative_detectors/test_base.py
@@ -1,16 +1,31 @@
 # Standard
 from dataclasses import dataclass
 from typing import Optional
+from unittest.mock import patch
 import asyncio
 
 # Third Party
 from vllm.config import MultiModalConfig
+from vllm.entrypoints.openai.protocol import (
+    ChatCompletionLogProb,
+    ChatCompletionLogProbs,
+    ChatCompletionLogProbsContent,
+    ChatCompletionResponse,
+    ChatCompletionResponseChoice,
+    ChatMessage,
+    UsageInfo,
+)
 from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
 import jinja2
+import pytest
 import pytest_asyncio
 
 # Local
 from vllm_detector_adapter.generative_detectors.base import ChatCompletionDetectionBase
+from vllm_detector_adapter.protocol import (
+    ContentsDetectionRequest,
+    ContentsDetectionResponse,
+)
 
 MODEL_NAME = "openai-community/gpt2"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"
@@ -82,6 +97,55 @@ async def detection_base():
     return _async_serving_detection_completion_init()
 
 
+@pytest.fixture(scope="module")
+def granite_completion_response():
+    log_probs_content_no = ChatCompletionLogProbsContent(
+        token="no",
+        logprob=-0.0013,
+        # 5 logprobs requested for scoring, skipping bytes for conciseness
+        top_logprobs=[
+            ChatCompletionLogProb(token="no", logprob=-0.053),
+            ChatCompletionLogProb(token="0", logprob=-6.61),
+            ChatCompletionLogProb(token="1", logprob=-16.90),
+            ChatCompletionLogProb(token="2", logprob=-17.39),
+            ChatCompletionLogProb(token="3", logprob=-17.61),
+        ],
+    )
+    log_probs_content_yes = ChatCompletionLogProbsContent(
+        token="yes",
+        logprob=-0.0013,
+        # 5 logprobs requested for scoring, skipping bytes for conciseness
+        top_logprobs=[
+            ChatCompletionLogProb(token="yes", logprob=-0.0013),
+            ChatCompletionLogProb(token="0", logprob=-6.61),
+            ChatCompletionLogProb(token="1", logprob=-16.90),
+            ChatCompletionLogProb(token="2", logprob=-17.39),
+            ChatCompletionLogProb(token="3", logprob=-17.61),
+        ],
+    )
+    choice_0 = ChatCompletionResponseChoice(
+        index=0,
+        message=ChatMessage(
+            role="assistant",
+            content="no",
+        ),
+        logprobs=ChatCompletionLogProbs(content=[log_probs_content_no]),
+    )
+    choice_1 = ChatCompletionResponseChoice(
+        index=1,
+        message=ChatMessage(
+            role="assistant",
+            content="yes",
+        ),
+        logprobs=ChatCompletionLogProbs(content=[log_probs_content_yes]),
+    )
+    yield ChatCompletionResponse(
+        model=MODEL_NAME,
+        choices=[choice_0, choice_1],
+        usage=UsageInfo(prompt_tokens=200, total_tokens=206, completion_tokens=6),
+    )
+
+
 ### Tests #####################################################################
 
 
@@ -97,3 +161,39 @@ def test_async_serving_detection_completion_init(detection_base):
     output_template = detection_completion.output_template
     assert type(output_template) == jinja2.environment.Template
     assert output_template.render(({"text": "moose"})) == "bye moose"
+
+
+def test_content_analysis_success(detection_base, granite_completion_response):
+    base_instance = asyncio.run(detection_base)
+
+    content_request = ContentsDetectionRequest(
+        contents=["Where do I find geese?", "You could go to Canada"]
+    )
+
+    scores = [0.9, 0.1, 0.21, 0.54, 0.33]
+    response = (granite_completion_response, scores, "risk")
+    with patch(
+        "vllm_detector_adapter.generative_detectors.base.ChatCompletionDetectionBase.process_chat_completion_with_scores",
+        return_value=response,
+    ):
+        result = asyncio.run(base_instance.content_analysis(content_request))
+        assert isinstance(result, ContentsDetectionResponse)
+        detections = result.model_dump()
+        assert len(detections) == 2
+        print(detections)
+        # For first content
+        assert detections[0][0]["detection"] == "no"
+        assert detections[0][0]["score"] == 0.9
+        assert detections[0][0]["start"] == 0
+        assert detections[0][0]["end"] == len(content_request.contents[0])
+        # 2nd choice as 2nd label
+        assert detections[0][1]["detection"] == "yes"
+        assert detections[0][1]["score"] == 0.1
+        assert detections[0][1]["start"] == 0
+        assert detections[0][1]["end"] == len(content_request.contents[0])
+        # For 2nd content, we are only testing 1st detection for simplicity
+        # Note: detection is same, because of how mock is working.
+        assert detections[1][0]["detection"] == "no"
+        assert detections[1][0]["score"] == 0.9
+        assert detections[1][0]["start"] == 0
+        assert detections[1][0]["end"] == len(content_request.contents[1])
diff --git a/vllm_detector_adapter/generative_detectors/base.py b/vllm_detector_adapter/generative_detectors/base.py
@@ -333,4 +333,6 @@ async def content_analysis(
             if isinstance(result, ErrorResponse):
                 return result
 
-        return ContentsDetectionResponse.from_chat_completion_response(results)
+        return ContentsDetectionResponse.from_chat_completion_response(
+            results, request.contents
+        )