feat: include model information in ChatCompletion responses for GCP Anthropic and VertexAI (#1358)

sukumargaonkar · web-flow · commit 3341eb817359 · 2025-10-14T15:50:10.000-04:00
**Description**
Ensure the model name from response body is passed on to openAI response
body

---------

Signed-off-by: Sukumar Gaonkar &lt;sgaonkar4@bloomberg.net&gt;
diff --git a/internal/extproc/translator/openai_awsbedrock.go b/internal/extproc/translator/openai_awsbedrock.go
@@ -639,6 +639,8 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) ResponseBody(_ map[string
 		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to unmarshal body: %w", err)
 	}
 	openAIResp := &openai.ChatCompletionResponse{
+		// We use request model as response model since bedrock does not return the modelName in the response.
+		Model:   o.requestModel,
 		Object:  "chat.completion",
 		Choices: make([]openai.ChatCompletionResponseChoice, 0),
 	}
diff --git a/internal/extproc/translator/openai_awsbedrock_test.go b/internal/extproc/translator/openai_awsbedrock_test.go
@@ -22,6 +22,7 @@ import (
 	"github.com/google/go-cmp/cmp"
 	openaigo "github.com/openai/openai-go/v2"
 	"github.com/stretchr/testify/require"
+	"github.com/tidwall/gjson"
 	"k8s.io/utils/ptr"
 
 	"github.com/envoyproxy/ai-gateway/internal/apischema/awsbedrock"
@@ -2241,9 +2242,11 @@ func TestResponseModel_AWSBedrock(t *testing.T) {
 		}
 	}`
 
-	_, _, tokenUsage, responseModel, err := translator.ResponseBody(nil, bytes.NewReader([]byte(bedrockResponse)), true, nil)
+	_, bm, tokenUsage, responseModel, err := translator.ResponseBody(nil, bytes.NewReader([]byte(bedrockResponse)), true, nil)
 	require.NoError(t, err)
 	require.Equal(t, modelName, responseModel) // Returns the request model since no virtualization
+	respBodyModel := gjson.GetBytes(bm.GetBody(), "model").Value()
+	require.Equal(t, modelName, respBodyModel)
 	require.Equal(t, uint32(10), tokenUsage.InputTokens)
 	require.Equal(t, uint32(5), tokenUsage.OutputTokens)
 }
diff --git a/internal/extproc/translator/openai_gcpanthropic.go b/internal/extproc/translator/openai_gcpanthropic.go
@@ -673,7 +673,13 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
 		return nil, nil, tokenUsage, "", fmt.Errorf("failed to unmarshal body: %w", err)
 	}
 
+	responseModel = o.requestModel
+	if anthropicResp.Model != "" {
+		responseModel = string(anthropicResp.Model)
+	}
+
 	openAIResp := &openai.ChatCompletionResponse{
+		Model:   responseModel,
 		Object:  string(openAIconstant.ValueOf[openAIconstant.ChatCompletion]()),
 		Choices: make([]openai.ChatCompletionResponseChoice, 0),
 	}
@@ -734,5 +740,5 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
 	if span != nil {
 		span.RecordResponse(openAIResp)
 	}
-	return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, o.requestModel, nil
+	return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, responseModel, nil
 }
diff --git a/internal/extproc/translator/openai_gcpanthropic_test.go b/internal/extproc/translator/openai_gcpanthropic_test.go
@@ -433,6 +433,37 @@ func TestOpenAIToGCPAnthropicTranslatorV1ChatCompletion_ResponseBody(t *testing.
 				},
 			},
 		},
+		{
+			name: "response with model field set",
+			inputResponse: &anthropic.Message{
+				ID:         "msg_01XYZ123",
+				Model:      "claude-3-5-sonnet-20241022",
+				Role:       constant.Assistant(anthropic.MessageParamRoleAssistant),
+				Content:    []anthropic.ContentBlockUnion{{Type: "text", Text: "Model field test response."}},
+				StopReason: anthropic.StopReasonEndTurn,
+				Usage:      anthropic.Usage{InputTokens: 8, OutputTokens: 12, CacheReadInputTokens: 2},
+			},
+			respHeaders: map[string]string{statusHeaderName: "200"},
+			expectedOpenAIResponse: openai.ChatCompletionResponse{
+				Model:  "claude-3-5-sonnet-20241022",
+				Object: "chat.completion",
+				Usage: openai.Usage{
+					PromptTokens:     8,
+					CompletionTokens: 12,
+					TotalTokens:      20,
+					PromptTokensDetails: &openai.PromptTokensDetails{
+						CachedTokens: 2,
+					},
+				},
+				Choices: []openai.ChatCompletionResponseChoice{
+					{
+						Index:        0,
+						Message:      openai.ChatCompletionResponseChoiceMessage{Role: "assistant", Content: ptr.To("Model field test response.")},
+						FinishReason: openai.ChatCompletionChoicesFinishReasonStop,
+					},
+				},
+			},
+		},
 	}
 
 	for _, tt := range tests {
diff --git a/internal/extproc/translator/openai_gcpvertexai.go b/internal/extproc/translator/openai_gcpvertexai.go
@@ -122,9 +122,15 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(_ map[strin
 		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("error decoding GCP response: %w", err)
 	}
 
+	responseModel = o.requestModel
+	if gcpResp.ModelVersion != "" {
+		// Use the model version from the response if available.
+		responseModel = gcpResp.ModelVersion
+	}
+
 	var openAIRespBytes []byte
 	// Convert to OpenAI format.
-	openAIResp, err := o.geminiResponseToOpenAIMessage(gcpResp)
+	openAIResp, err := o.geminiResponseToOpenAIMessage(gcpResp, responseModel)
 	if err != nil {
 		return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("error converting GCP response to OpenAI format: %w", err)
 	}
@@ -149,7 +155,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(_ map[strin
 	if span != nil {
 		span.RecordResponse(openAIResp)
 	}
-	return headerMutation, bodyMutation, usage, o.requestModel, nil
+	return headerMutation, bodyMutation, usage, responseModel, nil
 }
 
 // handleStreamingResponse handles streaming responses from GCP Gemini API.
@@ -322,7 +328,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) applyVendorSpecificField
 	}
 }
 
-func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMessage(gcr genai.GenerateContentResponse) (*openai.ChatCompletionResponse, error) {
+func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMessage(gcr genai.GenerateContentResponse, responseModel string) (*openai.ChatCompletionResponse, error) {
 	// Convert candidates to OpenAI choices.
 	choices, err := geminiCandidatesToOpenAIChoices(gcr.Candidates)
 	if err != nil {
@@ -331,6 +337,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMe
 
 	// Set up the OpenAI response.
 	openaiResp := &openai.ChatCompletionResponse{
+		Model:   responseModel,
 		Choices: choices,
 		Object:  "chat.completion",
 		Usage:   geminiUsageToOpenAIUsage(gcr.UsageMetadata),
diff --git a/internal/extproc/translator/openai_gcpvertexai_test.go b/internal/extproc/translator/openai_gcpvertexai_test.go
@@ -869,6 +869,73 @@ data: [DONE]
 				TotalTokens:  8,
 			},
 		},
+		{
+			name: "response with model version field",
+			respHeaders: map[string]string{
+				"content-type": "application/json",
+			},
+			body: `{
+				"modelVersion": "gemini-1.5-pro-002",
+				"candidates": [
+					{
+						"content": {
+							"parts": [
+								{
+									"text": "Response with model version set."
+								}
+							]
+						},
+						"finishReason": "STOP",
+						"safetyRatings": []
+					}
+				],
+				"promptFeedback": {
+					"safetyRatings": []
+				},
+				"usageMetadata": {
+					"promptTokenCount": 6,
+					"candidatesTokenCount": 8,
+					"totalTokenCount": 14
+				}
+			}`,
+			endOfStream: true,
+			wantError:   false,
+			wantHeaderMut: &extprocv3.HeaderMutation{
+				SetHeaders: []*corev3.HeaderValueOption{{
+					Header: &corev3.HeaderValue{Key: "Content-Length", RawValue: []byte("306")},
+				}},
+			},
+			wantBodyMut: &extprocv3.BodyMutation{
+				Mutation: &extprocv3.BodyMutation_Body{
+					Body: []byte(`{
+    "choices": [
+        {
+            "finish_reason": "stop",
+            "index": 0,
+            "message": {
+                "content": "Response with model version set.",
+                "role": "assistant"
+            }
+        }
+    ],
+    "model": "gemini-1.5-pro-002",
+    "object": "chat.completion",
+    "usage": {
+        "completion_tokens": 8,
+        "completion_tokens_details": {},
+        "prompt_tokens": 6,
+        "prompt_tokens_details": {},
+        "total_tokens": 14
+    }
+}`),
+				},
+			},
+			wantTokenUsage: LLMTokenUsage{
+				InputTokens:  6,
+				OutputTokens: 8,
+				TotalTokens:  14,
+			},
+		},
 	}
 
 	for _, tc := range tests {
diff --git a/tests/extproc/testupstream_test.go b/tests/extproc/testupstream_test.go
@@ -153,7 +153,7 @@ func TestWithTestUpstream(t *testing.T) {
 			responseBody:    `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`,
 			expRequestBody:  `{"inferenceConfig":{},"messages":[],"system":[{"text":"You are a chatbot."}]}`,
 			expStatus:       http.StatusOK,
-			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
+			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"model":"something","object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
 		},
 		{
 			name:            "openai - /v1/chat/completions",
@@ -199,7 +199,7 @@ func TestWithTestUpstream(t *testing.T) {
 			requestBody:     toolCallResultsRequestBody,
 			expRequestBody:  `{"inferenceConfig":{"maxTokens":1024},"messages":[{"content":[{"text":"List the files in the /tmp directory"}],"role":"user"},{"content":[{"toolUse":{"name":"list_files","input":{"path":"/tmp"},"toolUseId":"call_abc123"}}],"role":"assistant"},{"content":[{"toolResult":{"content":[{"text":"[\"foo.txt\", \"bar.log\", \"data.csv\"]"}],"status":null,"toolUseId":"call_abc123"}}],"role":"user"}]}`,
 			responseBody:    `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`,
-			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
+			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"model":"gpt-4-0613","object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
 			expStatus:       http.StatusOK,
 		},
 		{
@@ -211,7 +211,7 @@ func TestWithTestUpstream(t *testing.T) {
 			requestBody:     toolCallResultsRequestBody,
 			expRequestBody:  `{"max_tokens":1024,"messages":[{"content":[{"text":"List the files in the /tmp directory","type":"text"}],"role":"user"},{"content":[{"id":"call_abc123","input":{"path":"/tmp"},"name":"list_files","type":"tool_use"}],"role":"assistant"},{"content":[{"tool_use_id":"call_abc123","is_error":false,"content":[{"text":"[\"foo.txt\", \"bar.log\", \"data.csv\"]","type":"text"}],"type":"tool_result"}],"role":"user"}],"anthropic_version":"vertex-2023-10-16"}`,
 			responseBody:    `{"id":"msg_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":25,"cache_read_input_tokens":10}}`,
-			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":10}}}`,
+			expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"model":"gpt-4-0613","object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":10}}}`,
 			expStatus:       http.StatusOK,
 		},
 		{
@@ -238,7 +238,7 @@ func TestWithTestUpstream(t *testing.T) {
 			responseStatus:    strconv.Itoa(http.StatusOK),
 			responseBody:      `{"candidates":[{"content":{"parts":[{"text":"This is a test response from Gemini."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":15,"candidatesTokenCount":10,"totalTokenCount":25,"cachedContentTokenCount":10,"thoughtsTokenCount":10}}`,
 			expStatus:         http.StatusOK,
-			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":10},"prompt_tokens":15,"prompt_tokens_details":{"cached_tokens":10},"total_tokens":25}}`,
+			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"model":"gemini-1.5-pro","object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":10},"prompt_tokens":15,"prompt_tokens_details":{"cached_tokens":10},"total_tokens":25}}`,
 		},
 		{
 			name:              "gcp-vertexai - /v1/chat/completions",
@@ -253,7 +253,7 @@ func TestWithTestUpstream(t *testing.T) {
 			responseStatus:    strconv.Itoa(http.StatusOK),
 			responseBody:      `{"candidates":[{"content":{"parts":[{"text":"This is a test response from Gemini."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":15,"candidatesTokenCount":10,"totalTokenCount":25}}`,
 			expStatus:         http.StatusOK,
-			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{},"prompt_tokens":15,"total_tokens":25,"prompt_tokens_details":{}}}`,
+			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"model":"gemini-1.5-pro","object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{},"prompt_tokens":15,"total_tokens":25,"prompt_tokens_details":{}}}`,
 		},
 		{
 			name:              "gcp-vertexai - /v1/chat/completions - tool use",
@@ -268,7 +268,7 @@ func TestWithTestUpstream(t *testing.T) {
 			responseStatus:    strconv.Itoa(http.StatusOK),
 			responseBody:      `{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"get_delivery_date","args":{"order_id":"123"}}}]},"finishReason":"STOP","avgLogprobs":0.000001220789272338152}],"usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":11,"totalTokenCount":61,"trafficType":"ON_DEMAND","promptTokensDetails":[{"modality":"TEXT","tokenCount":50}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":11}]},"modelVersion":"gemini-2.0-flash-001","createTime":"2025-07-11T22:15:44.956335Z","responseId":"EI5xaK-vOtqJm22IPmuCR14AI"}`,
 			expStatus:         http.StatusOK,
-			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"role":"assistant","tool_calls":[{"id":"703482f8-2e5b-4dcc-a872-d74bd66c3866","function":{"arguments":"{\"order_id\":\"123\"}","name":"get_delivery_date"},"type":"function"}]}}],"object":"chat.completion","usage":{"completion_tokens":11,"completion_tokens_details":{},"prompt_tokens":50,"total_tokens":61,"prompt_tokens_details":{}}}`,
+			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"role":"assistant","tool_calls":[{"id":"703482f8-2e5b-4dcc-a872-d74bd66c3866","function":{"arguments":"{\"order_id\":\"123\"}","name":"get_delivery_date"},"type":"function"}]}}],"model":"gemini-2.0-flash-001","object":"chat.completion","usage":{"completion_tokens":11,"completion_tokens_details":{},"prompt_tokens":50,"total_tokens":61,"prompt_tokens_details":{}}}`,
 		},
 		{
 			name:              "gcp-anthropicai - /v1/chat/completions",
@@ -283,7 +283,7 @@ func TestWithTestUpstream(t *testing.T) {
 			responseStatus:    strconv.Itoa(http.StatusOK),
 			responseBody:      `{"id":"msg_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":25}}`,
 			expStatus:         http.StatusOK,
-			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{}}}`,
+			expResponseBody:   `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"model":"claude-3-sonnet","object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{}}}`,
 		},
 		{
 			name:            "modelname-override - /v1/chat/completions",

Original file line number	Diff line number	Diff line change
`@@ -639,6 +639,8 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) ResponseBody(_ map[string`
`639`	`639`	`return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to unmarshal body: %w", err)`
`640`	`640`	`}`
`641`	`641`	`openAIResp := &openai.ChatCompletionResponse{`
	`642`	`+ // We use request model as response model since bedrock does not return the modelName in the response.`
	`643`	`+ Model: o.requestModel,`
`642`	`644`	`Object: "chat.completion",`
`643`	`645`	`Choices: make([]openai.ChatCompletionResponseChoice, 0),`
`644`	`646`	`}`
Original file line number	Diff line number	Diff line change
`@@ -673,7 +673,13 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri`
`673`	`673`	`return nil, nil, tokenUsage, "", fmt.Errorf("failed to unmarshal body: %w", err)`
`674`	`674`	`}`
`675`	`675`
	`676`	`+ responseModel = o.requestModel`
	`677`	`+ if anthropicResp.Model != "" {`
	`678`	`+ responseModel = string(anthropicResp.Model)`
	`679`	`+ }`
	`680`	`+`
`676`	`681`	`openAIResp := &openai.ChatCompletionResponse{`
	`682`	`+ Model: responseModel,`
`677`	`683`	`Object: string(openAIconstant.ValueOf[openAIconstant.ChatCompletion]()),`
`678`	`684`	`Choices: make([]openai.ChatCompletionResponseChoice, 0),`
`679`	`685`	`}`
`@@ -734,5 +740,5 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri`
`734`	`740`	`if span != nil {`
`735`	`741`	`span.RecordResponse(openAIResp)`
`736`	`742`	`}`
`737`		`- return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, o.requestModel, nil`
	`743`	`+ return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, responseModel, nil`
`738`	`744`	`}`