Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions tests/generative_detectors/test_granite_guardian.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import asyncio

# Third Party
from jinja2.exceptions import TemplateError, UndefinedError
from vllm.config import MultiModalConfig
from vllm.entrypoints.openai.protocol import (
ChatCompletionLogProb,
Expand Down Expand Up @@ -415,3 +416,41 @@ def test_chat_detection_errors_on_stream(granite_guardian_detection):
assert type(detection_response) == ErrorResponse
assert detection_response.code == HTTPStatus.BAD_REQUEST.value
assert "streaming is not supported" in detection_response.message


def test_chat_detection_errors_on_jinja_template_error(granite_guardian_detection):
granite_guardian_detection_instance = asyncio.run(granite_guardian_detection)
chat_request = ChatDetectionRequest(
messages=[
DetectionChatMessageParam(role="user", content="How do I pick a lock?")
],
)
with patch(
"vllm_detector_adapter.generative_detectors.granite_guardian.GraniteGuardian.create_chat_completion",
side_effect=TemplateError(),
):
detection_response = asyncio.run(
granite_guardian_detection_instance.chat(chat_request)
)
assert type(detection_response) == ErrorResponse
assert detection_response.code == HTTPStatus.BAD_REQUEST.value
assert "Template error" in detection_response.message


def test_chat_detection_errors_on_undefined_jinja_error(granite_guardian_detection):
granite_guardian_detection_instance = asyncio.run(granite_guardian_detection)
chat_request = ChatDetectionRequest(
messages=[
DetectionChatMessageParam(role="user", content="How do I pick a lock?")
],
)
with patch(
"vllm_detector_adapter.generative_detectors.granite_guardian.GraniteGuardian.create_chat_completion",
side_effect=UndefinedError(),
):
detection_response = asyncio.run(
granite_guardian_detection_instance.chat(chat_request)
)
assert type(detection_response) == ErrorResponse
assert detection_response.code == HTTPStatus.BAD_REQUEST.value
assert "Template error. Please check request." in detection_response.message
30 changes: 27 additions & 3 deletions vllm_detector_adapter/generative_detectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

# Third Party
from fastapi import Request
from jinja2.exceptions import TemplateError, UndefinedError
from vllm.entrypoints.openai.protocol import ChatCompletionResponse, ErrorResponse
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
import jinja2
Expand Down Expand Up @@ -157,9 +158,32 @@ async def process_chat_completion_with_scores(
logger.debug("Request to chat completion: %s", chat_completion_request)

# Call chat completion
chat_response = await self.create_chat_completion(
chat_completion_request, raw_request
)
try:
chat_response = await self.create_chat_completion(
chat_completion_request, raw_request
)
except UndefinedError:
# Undefined template errors can happen due to a variety of reasons -
# e.g. for Granite Guardian it is not limited but including the following
# for a particular risk definition: unexpected number of messages, unexpected
# ordering of messages, unexpected roles used for particular messages.
# Users _may_ be able to correct some of these errors by changing the input
# but the error message may not be directly user-comprehensible
# This has to be caught first because jinja UndefinedError are TemplateError
chat_response = ErrorResponse(
message="Template error. Please check request.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the message in this case not usable to be passed to the user?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - an example is example 2 of the PR description e.g. 'dict object' has no attribute 'prompt' when prompt is not an end-user-facing parameter. I think considerations could be made to see if some more of these can be given as feedback to be caught as exceptions in the default template [depending on the model].

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm. true. but that information might still be helpful for debugging. So can we log the raw error message while we switch it with better error message here ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, we can just fall through to the general TemplateError then

type="BadRequestError",
code=HTTPStatus.BAD_REQUEST.value,
)
except TemplateError as e:
# Template errors can be propagated, such as those from raise_exception
# in the chat_template
chat_response = ErrorResponse(
message=e.message or "Template error",
type="BadRequestError",
code=HTTPStatus.BAD_REQUEST.value,
)

logger.debug("Raw chat completion response: %s", chat_response)
if isinstance(chat_response, ErrorResponse):
# Propagate chat completion errors directly
Expand Down