feat: add gemini safety ratings to ChatCompletion responses (envoyproxy#1287)

sukumargaonkar · aabchoo · yuzisun · nutanix-Hrushikesh · commit 32ce458aebda · 2025-10-17T00:12:44.000+05:30
**Description** This PR adds support for safety ratings in ChatCompletion responses when using GCP Vertex AI. The safety ratings are copied from the Vertex AI response as-is and included in the OpenAI-compatible response format. The implementation adds a new `SafetyRatings` field to the `ChatCompletionResponseChoiceMessage` struct. GCP Safety Ratings doc: [1] Key changes: - Added `SafetyRatings` field to OpenAI API schema for chat completion responses - Updated Gemini translator to map safety ratings from Vertex AI responses - Safety ratings are only included when present in the backend response **Special notes for reviewers (if applicable)** The safety ratings follow the GCP Vertex AI format and are passed through unchanged to maintain compatibility with Google's safety rating system. The field is optional and only populated when safety ratings are present in the upstream response. [1]: https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/GenerateContentResponse#SafetyRating --------- Signed-off-by: Sukumar Gaonkar <sgaonkar4@bloomberg.net> Signed-off-by: Dan Sun <dsun20@bloomberg.net> Co-authored-by: Aaron Choo <achoo30@bloomberg.net> Co-authored-by: Dan Sun <dsun20@bloomberg.net> Co-authored-by: Takeshi Yoneda <t.y.mathetake@gmail.com> Signed-off-by: Hrushikesh Patil <hrushikesh.patil@nutanix.com>
diff --git a/internal/apischema/openai/openai.go b/internal/apischema/openai/openai.go
@@ -1220,6 +1220,13 @@ type ChatCompletionResponseChoiceMessage struct {
 	// ReasoningContent is used to hold any non-standard fields from the backend which supports reasoning,
 	// like "reasoningContent" from AWS Bedrock.
 	ReasoningContent *ReasoningContentUnion `json:"reasoning_content,omitempty"`
+
+	// GCPVertexAI specific fields.
+
+	// SafetyRatings contains safety ratings copied from the GCP Vertex AI response as-is.
+	// List of ratings for the safety of a response candidate. There is at most one rating per category.
+	// https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/GenerateContentResponse#SafetyRating
+	SafetyRatings []*genai.SafetyRating `json:"safety_ratings,omitempty"`
 }
 
 // URLCitation contains citation information for web search results.
diff --git a/internal/apischema/openai/openai_test.go b/internal/apischema/openai/openai_test.go
@@ -15,6 +15,7 @@ import (
 	"github.com/openai/openai-go/v2"
 	"github.com/openai/openai-go/v2/packages/param"
 	"github.com/stretchr/testify/require"
+	"google.golang.org/genai"
 	"k8s.io/utils/ptr"
 )
 
@@ -992,6 +993,69 @@ func TestChatCompletionResponse(t *testing.T) {
 				}
 			}`,
 		},
+		{
+			name: "response with safety settings",
+			response: ChatCompletionResponse{
+				ID:      "chatcmpl-safety-test",
+				Created: JSONUNIXTime(time.Unix(1755135425, 0)),
+				Model:   "gpt-4.1-nano",
+				Object:  "chat.completion",
+				Choices: []ChatCompletionResponseChoice{
+					{
+						Index:        0,
+						FinishReason: ChatCompletionChoicesFinishReasonStop,
+						Message: ChatCompletionResponseChoiceMessage{
+							Role:    "assistant",
+							Content: ptr.To("This is a safe response"),
+							SafetyRatings: []*genai.SafetyRating{
+								{
+									Category:    genai.HarmCategoryHarassment,
+									Probability: genai.HarmProbabilityLow,
+								},
+								{
+									Category:    genai.HarmCategorySexuallyExplicit,
+									Probability: genai.HarmProbabilityNegligible,
+								},
+							},
+						},
+					},
+				},
+				Usage: ChatCompletionResponseUsage{
+					CompletionTokens: 5,
+					PromptTokens:     3,
+					TotalTokens:      8,
+				},
+			},
+			expected: `{
+				"id": "chatcmpl-safety-test",
+				"object": "chat.completion",
+				"created": 1755135425,
+				"model": "gpt-4.1-nano",
+				"choices": [{
+					"index": 0,
+					"message": {
+						"role": "assistant",
+						"content": "This is a safe response",
+						"safety_ratings": [
+							{
+								"category": "HARM_CATEGORY_HARASSMENT",
+								"probability": "LOW"
+							},
+							{
+								"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+								"probability": "NEGLIGIBLE"
+							}
+						]
+					},
+					"finish_reason": "stop"
+				}],
+				"usage": {
+					"prompt_tokens": 3,
+					"completion_tokens": 5,
+					"total_tokens": 8
+				}
+			}`,
+		},
 	}
 
 	for _, tc := range testCases {
diff --git a/internal/extproc/translator/gemini_helper.go b/internal/extproc/translator/gemini_helper.go
@@ -486,6 +486,14 @@ func geminiCandidatesToOpenAIChoices(candidates []*genai.Candidate) ([]openai.Ch
 			choice.Message = message
 		}
 
+		if candidate.SafetyRatings != nil {
+			if choice.Message.Role == "" {
+				choice.Message.Role = openai.ChatMessageRoleAssistant
+			}
+
+			choice.Message.SafetyRatings = candidate.SafetyRatings
+		}
+
 		// Handle logprobs if available.
 		if candidate.LogprobsResult != nil {
 			choice.Logprobs = geminiLogprobsToOpenAILogprobs(*candidate.LogprobsResult)
diff --git a/internal/extproc/translator/openai_gcpvertexai_test.go b/internal/extproc/translator/openai_gcpvertexai_test.go
@@ -721,6 +721,96 @@ func TestOpenAIToGCPVertexAITranslatorV1ChatCompletion_ResponseBody(t *testing.T
 				TotalTokens:  25,
 			},
 		},
+		{
+			name: "response with safety ratings",
+			respHeaders: map[string]string{
+				"content-type": "application/json",
+			},
+			body: `{
+				"candidates": [
+					{
+						"content": {
+							"parts": [
+								{
+									"text": "This is a safe response from the AI assistant."
+								}
+							]
+						},
+						"finishReason": "STOP",
+						"safetyRatings": [
+							{
+								"category": "HARM_CATEGORY_HARASSMENT",
+								"probability": "LOW"
+							},
+							{
+								"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+								"probability": "NEGLIGIBLE"
+							},
+							{
+								"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+								"probability": "MEDIUM"
+							}
+						]
+					}
+				],
+				"promptFeedback": {
+					"safetyRatings": []
+				},
+				"usageMetadata": {
+					"promptTokenCount": 8,
+					"candidatesTokenCount": 12,
+					"totalTokenCount": 20
+				}
+			}`,
+			endOfStream: true,
+			wantError:   false,
+			wantHeaderMut: &extprocv3.HeaderMutation{
+				SetHeaders: []*corev3.HeaderValueOption{{
+					Header: &corev3.HeaderValue{Key: "Content-Length", RawValue: []byte("457")},
+				}},
+			},
+			wantBodyMut: &extprocv3.BodyMutation{
+				Mutation: &extprocv3.BodyMutation_Body{
+					Body: []byte(`{
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "This is a safe response from the AI assistant.",
+                "role": "assistant",
+                "safety_ratings": [
+                    {
+                        "category": "HARM_CATEGORY_HARASSMENT",
+                        "probability": "LOW"
+                    },
+                    {
+                        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                        "probability": "NEGLIGIBLE"
+                    },
+                    {
+                        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                        "probability": "MEDIUM"
+                    }
+                ]
+            }
+        }
+    ],
+    "object": "chat.completion",
+    "usage": {
+        "completion_tokens": 12,
+        "prompt_tokens": 8,
+        "total_tokens": 20
+    }
+}`),
+				},
+			},
+			wantTokenUsage: LLMTokenUsage{
+				InputTokens:  8,
+				OutputTokens: 12,
+				TotalTokens:  20,
+			},
+		},
 		{
 			name: "empty response",
 			respHeaders: map[string]string{