Skip to content

Commit 3341eb8

Browse files
feat: include model information in ChatCompletion responses for GCP Anthropic and VertexAI (#1358)
**Description** Ensure the model name from response body is passed on to openAI response body --------- Signed-off-by: Sukumar Gaonkar <[email protected]>
1 parent 1078890 commit 3341eb8

File tree

7 files changed

+128
-12
lines changed

7 files changed

+128
-12
lines changed

internal/extproc/translator/openai_awsbedrock.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,8 @@ func (o *openAIToAWSBedrockTranslatorV1ChatCompletion) ResponseBody(_ map[string
639639
return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to unmarshal body: %w", err)
640640
}
641641
openAIResp := &openai.ChatCompletionResponse{
642+
// We use request model as response model since bedrock does not return the modelName in the response.
643+
Model: o.requestModel,
642644
Object: "chat.completion",
643645
Choices: make([]openai.ChatCompletionResponseChoice, 0),
644646
}

internal/extproc/translator/openai_awsbedrock_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/google/go-cmp/cmp"
2323
openaigo "github.com/openai/openai-go/v2"
2424
"github.com/stretchr/testify/require"
25+
"github.com/tidwall/gjson"
2526
"k8s.io/utils/ptr"
2627

2728
"github.com/envoyproxy/ai-gateway/internal/apischema/awsbedrock"
@@ -2241,9 +2242,11 @@ func TestResponseModel_AWSBedrock(t *testing.T) {
22412242
}
22422243
}`
22432244

2244-
_, _, tokenUsage, responseModel, err := translator.ResponseBody(nil, bytes.NewReader([]byte(bedrockResponse)), true, nil)
2245+
_, bm, tokenUsage, responseModel, err := translator.ResponseBody(nil, bytes.NewReader([]byte(bedrockResponse)), true, nil)
22452246
require.NoError(t, err)
22462247
require.Equal(t, modelName, responseModel) // Returns the request model since no virtualization
2248+
respBodyModel := gjson.GetBytes(bm.GetBody(), "model").Value()
2249+
require.Equal(t, modelName, respBodyModel)
22472250
require.Equal(t, uint32(10), tokenUsage.InputTokens)
22482251
require.Equal(t, uint32(5), tokenUsage.OutputTokens)
22492252
}

internal/extproc/translator/openai_gcpanthropic.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,13 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
673673
return nil, nil, tokenUsage, "", fmt.Errorf("failed to unmarshal body: %w", err)
674674
}
675675

676+
responseModel = o.requestModel
677+
if anthropicResp.Model != "" {
678+
responseModel = string(anthropicResp.Model)
679+
}
680+
676681
openAIResp := &openai.ChatCompletionResponse{
682+
Model: responseModel,
677683
Object: string(openAIconstant.ValueOf[openAIconstant.ChatCompletion]()),
678684
Choices: make([]openai.ChatCompletionResponseChoice, 0),
679685
}
@@ -734,5 +740,5 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
734740
if span != nil {
735741
span.RecordResponse(openAIResp)
736742
}
737-
return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, o.requestModel, nil
743+
return headerMutation, &extprocv3.BodyMutation{Mutation: mut}, tokenUsage, responseModel, nil
738744
}

internal/extproc/translator/openai_gcpanthropic_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,37 @@ func TestOpenAIToGCPAnthropicTranslatorV1ChatCompletion_ResponseBody(t *testing.
433433
},
434434
},
435435
},
436+
{
437+
name: "response with model field set",
438+
inputResponse: &anthropic.Message{
439+
ID: "msg_01XYZ123",
440+
Model: "claude-3-5-sonnet-20241022",
441+
Role: constant.Assistant(anthropic.MessageParamRoleAssistant),
442+
Content: []anthropic.ContentBlockUnion{{Type: "text", Text: "Model field test response."}},
443+
StopReason: anthropic.StopReasonEndTurn,
444+
Usage: anthropic.Usage{InputTokens: 8, OutputTokens: 12, CacheReadInputTokens: 2},
445+
},
446+
respHeaders: map[string]string{statusHeaderName: "200"},
447+
expectedOpenAIResponse: openai.ChatCompletionResponse{
448+
Model: "claude-3-5-sonnet-20241022",
449+
Object: "chat.completion",
450+
Usage: openai.Usage{
451+
PromptTokens: 8,
452+
CompletionTokens: 12,
453+
TotalTokens: 20,
454+
PromptTokensDetails: &openai.PromptTokensDetails{
455+
CachedTokens: 2,
456+
},
457+
},
458+
Choices: []openai.ChatCompletionResponseChoice{
459+
{
460+
Index: 0,
461+
Message: openai.ChatCompletionResponseChoiceMessage{Role: "assistant", Content: ptr.To("Model field test response.")},
462+
FinishReason: openai.ChatCompletionChoicesFinishReasonStop,
463+
},
464+
},
465+
},
466+
},
436467
}
437468

438469
for _, tt := range tests {

internal/extproc/translator/openai_gcpvertexai.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,15 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(_ map[strin
122122
return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("error decoding GCP response: %w", err)
123123
}
124124

125+
responseModel = o.requestModel
126+
if gcpResp.ModelVersion != "" {
127+
// Use the model version from the response if available.
128+
responseModel = gcpResp.ModelVersion
129+
}
130+
125131
var openAIRespBytes []byte
126132
// Convert to OpenAI format.
127-
openAIResp, err := o.geminiResponseToOpenAIMessage(gcpResp)
133+
openAIResp, err := o.geminiResponseToOpenAIMessage(gcpResp, responseModel)
128134
if err != nil {
129135
return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("error converting GCP response to OpenAI format: %w", err)
130136
}
@@ -149,7 +155,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) ResponseBody(_ map[strin
149155
if span != nil {
150156
span.RecordResponse(openAIResp)
151157
}
152-
return headerMutation, bodyMutation, usage, o.requestModel, nil
158+
return headerMutation, bodyMutation, usage, responseModel, nil
153159
}
154160

155161
// handleStreamingResponse handles streaming responses from GCP Gemini API.
@@ -322,7 +328,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) applyVendorSpecificField
322328
}
323329
}
324330

325-
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMessage(gcr genai.GenerateContentResponse) (*openai.ChatCompletionResponse, error) {
331+
func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMessage(gcr genai.GenerateContentResponse, responseModel string) (*openai.ChatCompletionResponse, error) {
326332
// Convert candidates to OpenAI choices.
327333
choices, err := geminiCandidatesToOpenAIChoices(gcr.Candidates)
328334
if err != nil {
@@ -331,6 +337,7 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) geminiResponseToOpenAIMe
331337

332338
// Set up the OpenAI response.
333339
openaiResp := &openai.ChatCompletionResponse{
340+
Model: responseModel,
334341
Choices: choices,
335342
Object: "chat.completion",
336343
Usage: geminiUsageToOpenAIUsage(gcr.UsageMetadata),

internal/extproc/translator/openai_gcpvertexai_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,73 @@ data: [DONE]
869869
TotalTokens: 8,
870870
},
871871
},
872+
{
873+
name: "response with model version field",
874+
respHeaders: map[string]string{
875+
"content-type": "application/json",
876+
},
877+
body: `{
878+
"modelVersion": "gemini-1.5-pro-002",
879+
"candidates": [
880+
{
881+
"content": {
882+
"parts": [
883+
{
884+
"text": "Response with model version set."
885+
}
886+
]
887+
},
888+
"finishReason": "STOP",
889+
"safetyRatings": []
890+
}
891+
],
892+
"promptFeedback": {
893+
"safetyRatings": []
894+
},
895+
"usageMetadata": {
896+
"promptTokenCount": 6,
897+
"candidatesTokenCount": 8,
898+
"totalTokenCount": 14
899+
}
900+
}`,
901+
endOfStream: true,
902+
wantError: false,
903+
wantHeaderMut: &extprocv3.HeaderMutation{
904+
SetHeaders: []*corev3.HeaderValueOption{{
905+
Header: &corev3.HeaderValue{Key: "Content-Length", RawValue: []byte("306")},
906+
}},
907+
},
908+
wantBodyMut: &extprocv3.BodyMutation{
909+
Mutation: &extprocv3.BodyMutation_Body{
910+
Body: []byte(`{
911+
"choices": [
912+
{
913+
"finish_reason": "stop",
914+
"index": 0,
915+
"message": {
916+
"content": "Response with model version set.",
917+
"role": "assistant"
918+
}
919+
}
920+
],
921+
"model": "gemini-1.5-pro-002",
922+
"object": "chat.completion",
923+
"usage": {
924+
"completion_tokens": 8,
925+
"completion_tokens_details": {},
926+
"prompt_tokens": 6,
927+
"prompt_tokens_details": {},
928+
"total_tokens": 14
929+
}
930+
}`),
931+
},
932+
},
933+
wantTokenUsage: LLMTokenUsage{
934+
InputTokens: 6,
935+
OutputTokens: 8,
936+
TotalTokens: 14,
937+
},
938+
},
872939
}
873940

874941
for _, tc := range tests {

tests/extproc/testupstream_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func TestWithTestUpstream(t *testing.T) {
153153
responseBody: `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`,
154154
expRequestBody: `{"inferenceConfig":{},"messages":[],"system":[{"text":"You are a chatbot."}]}`,
155155
expStatus: http.StatusOK,
156-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
156+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"model":"something","object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
157157
},
158158
{
159159
name: "openai - /v1/chat/completions",
@@ -199,7 +199,7 @@ func TestWithTestUpstream(t *testing.T) {
199199
requestBody: toolCallResultsRequestBody,
200200
expRequestBody: `{"inferenceConfig":{"maxTokens":1024},"messages":[{"content":[{"text":"List the files in the /tmp directory"}],"role":"user"},{"content":[{"toolUse":{"name":"list_files","input":{"path":"/tmp"},"toolUseId":"call_abc123"}}],"role":"assistant"},{"content":[{"toolResult":{"content":[{"text":"[\"foo.txt\", \"bar.log\", \"data.csv\"]"}],"status":null,"toolUseId":"call_abc123"}}],"role":"user"}]}`,
201201
responseBody: `{"output":{"message":{"content":[{"text":"response"},{"text":"from"},{"text":"assistant"}],"role":"assistant"}},"stopReason":null,"usage":{"inputTokens":10,"outputTokens":20,"totalTokens":30}}`,
202-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
202+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"response","role":"assistant"}}],"model":"gpt-4-0613","object":"chat.completion","usage":{"completion_tokens":20,"prompt_tokens":10,"total_tokens":30}}`,
203203
expStatus: http.StatusOK,
204204
},
205205
{
@@ -211,7 +211,7 @@ func TestWithTestUpstream(t *testing.T) {
211211
requestBody: toolCallResultsRequestBody,
212212
expRequestBody: `{"max_tokens":1024,"messages":[{"content":[{"text":"List the files in the /tmp directory","type":"text"}],"role":"user"},{"content":[{"id":"call_abc123","input":{"path":"/tmp"},"name":"list_files","type":"tool_use"}],"role":"assistant"},{"content":[{"tool_use_id":"call_abc123","is_error":false,"content":[{"text":"[\"foo.txt\", \"bar.log\", \"data.csv\"]","type":"text"}],"type":"tool_result"}],"role":"user"}],"anthropic_version":"vertex-2023-10-16"}`,
213213
responseBody: `{"id":"msg_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":25,"cache_read_input_tokens":10}}`,
214-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":10}}}`,
214+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"model":"gpt-4-0613","object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":10}}}`,
215215
expStatus: http.StatusOK,
216216
},
217217
{
@@ -238,7 +238,7 @@ func TestWithTestUpstream(t *testing.T) {
238238
responseStatus: strconv.Itoa(http.StatusOK),
239239
responseBody: `{"candidates":[{"content":{"parts":[{"text":"This is a test response from Gemini."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":15,"candidatesTokenCount":10,"totalTokenCount":25,"cachedContentTokenCount":10,"thoughtsTokenCount":10}}`,
240240
expStatus: http.StatusOK,
241-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":10},"prompt_tokens":15,"prompt_tokens_details":{"cached_tokens":10},"total_tokens":25}}`,
241+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"model":"gemini-1.5-pro","object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{"reasoning_tokens":10},"prompt_tokens":15,"prompt_tokens_details":{"cached_tokens":10},"total_tokens":25}}`,
242242
},
243243
{
244244
name: "gcp-vertexai - /v1/chat/completions",
@@ -253,7 +253,7 @@ func TestWithTestUpstream(t *testing.T) {
253253
responseStatus: strconv.Itoa(http.StatusOK),
254254
responseBody: `{"candidates":[{"content":{"parts":[{"text":"This is a test response from Gemini."}],"role":"model"},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":15,"candidatesTokenCount":10,"totalTokenCount":25}}`,
255255
expStatus: http.StatusOK,
256-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{},"prompt_tokens":15,"total_tokens":25,"prompt_tokens_details":{}}}`,
256+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"This is a test response from Gemini.","role":"assistant"}}],"model":"gemini-1.5-pro","object":"chat.completion","usage":{"completion_tokens":10,"completion_tokens_details":{},"prompt_tokens":15,"total_tokens":25,"prompt_tokens_details":{}}}`,
257257
},
258258
{
259259
name: "gcp-vertexai - /v1/chat/completions - tool use",
@@ -268,7 +268,7 @@ func TestWithTestUpstream(t *testing.T) {
268268
responseStatus: strconv.Itoa(http.StatusOK),
269269
responseBody: `{"candidates":[{"content":{"role":"model","parts":[{"functionCall":{"name":"get_delivery_date","args":{"order_id":"123"}}}]},"finishReason":"STOP","avgLogprobs":0.000001220789272338152}],"usageMetadata":{"promptTokenCount":50,"candidatesTokenCount":11,"totalTokenCount":61,"trafficType":"ON_DEMAND","promptTokensDetails":[{"modality":"TEXT","tokenCount":50}],"candidatesTokensDetails":[{"modality":"TEXT","tokenCount":11}]},"modelVersion":"gemini-2.0-flash-001","createTime":"2025-07-11T22:15:44.956335Z","responseId":"EI5xaK-vOtqJm22IPmuCR14AI"}`,
270270
expStatus: http.StatusOK,
271-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"role":"assistant","tool_calls":[{"id":"703482f8-2e5b-4dcc-a872-d74bd66c3866","function":{"arguments":"{\"order_id\":\"123\"}","name":"get_delivery_date"},"type":"function"}]}}],"object":"chat.completion","usage":{"completion_tokens":11,"completion_tokens_details":{},"prompt_tokens":50,"total_tokens":61,"prompt_tokens_details":{}}}`,
271+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"role":"assistant","tool_calls":[{"id":"703482f8-2e5b-4dcc-a872-d74bd66c3866","function":{"arguments":"{\"order_id\":\"123\"}","name":"get_delivery_date"},"type":"function"}]}}],"model":"gemini-2.0-flash-001","object":"chat.completion","usage":{"completion_tokens":11,"completion_tokens_details":{},"prompt_tokens":50,"total_tokens":61,"prompt_tokens_details":{}}}`,
272272
},
273273
{
274274
name: "gcp-anthropicai - /v1/chat/completions",
@@ -283,7 +283,7 @@ func TestWithTestUpstream(t *testing.T) {
283283
responseStatus: strconv.Itoa(http.StatusOK),
284284
responseBody: `{"id":"msg_123","type":"message","role":"assistant","stop_reason": "end_turn", "content":[{"type":"text","text":"Hello from Anthropic!"}],"usage":{"input_tokens":10,"output_tokens":25}}`,
285285
expStatus: http.StatusOK,
286-
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{}}}`,
286+
expResponseBody: `{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello from Anthropic!","role":"assistant"}}],"model":"claude-3-sonnet","object":"chat.completion","usage":{"completion_tokens":25,"prompt_tokens":10,"total_tokens":35,"prompt_tokens_details":{}}}`,
287287
},
288288
{
289289
name: "modelname-override - /v1/chat/completions",

0 commit comments

Comments
 (0)