Skip to content

Commit fe1a8df

Browse files
hustxiayangyuzisunmathetakeaabchoo
authored
fix: make usage chunk in stream mode of gemini compatible with openai (#1503)
**Description** Users found the simply use "usage" information does not work for streaming responses of gemini models. <img width="964" height="70" alt="image" src="https://github.com/user-attachments/assets/334a5515-a57b-4f83-9bab-f296422409b8" /> This is because for openai models, the usage chunk would be a separate chunk. For example, this is an example response from gpt-4o: ``` ... chunk=ChatCompletionChunk(id='chatcmpl-CYv6DPkWfT1xrsS2ySOoRztQKnZDg', choices=[Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='stop', index=0, logprobs=None, content_filter_result={'error': {'code': 'content_filter_error', 'message': 'The contents are not filtered'}})], created=1762438677, model='azure.gpt-4o', object='chat.completion.chunk', service_tier=None, system_fingerprint='fp_4a331a0222', usage=None, obfuscation='2xP') chunk=ChatCompletionChunk(id='chatcmpl-CYv6DPkWfT1xrsS2ySOoRztQKnZDg', choices=[], created=1762438677, model='azure.gpt-4o', object='chat.completion.chunk', service_tier=None, system_fingerprint='fp_4a331a0222', usage=CompletionUsage(completion_tokens=17, prompt_tokens=12, total_tokens=29, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)), obfuscation='xY40HsJr') ``` There is a finish_reason chunk, and then a usage chunk. Thus, want to make it compatible with Openai. (Actually, in anthropic translation, it's already compatible) --------- Signed-off-by: yxia216 <[email protected]> Signed-off-by: Takeshi Yoneda <[email protected]> Co-authored-by: Dan Sun <[email protected]> Co-authored-by: Takeshi Yoneda <[email protected]> Co-authored-by: Aaron Choo <[email protected]>
1 parent d78bbb9 commit fe1a8df

File tree

3 files changed

+47
-23
lines changed

3 files changed

+47
-23
lines changed

internal/translator/openai_gcpvertexai.go

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,41 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
194194
// Convert GCP chunk to OpenAI chunk.
195195
openAIChunk := o.convertGCPChunkToOpenAI(chunk)
196196

197-
// Extract token usage if present in this chunk (typically in the last chunk).
198-
if chunk.UsageMetadata != nil {
197+
// Serialize to SSE format as expected by OpenAI API.
198+
err := serializeOpenAIChatCompletionChunk(*openAIChunk, &newBody)
199+
if err != nil {
200+
return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
201+
}
202+
203+
if span != nil {
204+
span.RecordResponseChunk(openAIChunk)
205+
}
206+
207+
// Extract token usage only in the last chunk.
208+
if chunk.UsageMetadata != nil && chunk.UsageMetadata.PromptTokenCount > 0 {
209+
// Convert usage to pointer if available.
210+
usage := ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
211+
212+
usageChunk := openai.ChatCompletionResponseChunk{
213+
ID: chunk.ResponseID,
214+
Created: openai.JSONUNIXTime(chunk.CreateTime),
215+
Object: "chat.completion.chunk",
216+
Choices: []openai.ChatCompletionResponseChunkChoice{},
217+
// usage is nil for all chunks other than the last chunk
218+
Usage: usage,
219+
Model: o.requestModel,
220+
}
221+
222+
// Serialize to SSE format as expected by OpenAI API.
223+
err := serializeOpenAIChatCompletionChunk(usageChunk, &newBody)
224+
if err != nil {
225+
return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
226+
}
227+
228+
if span != nil {
229+
span.RecordResponseChunk(&usageChunk)
230+
}
231+
199232
if chunk.UsageMetadata.PromptTokenCount >= 0 {
200233
tokenUsage.SetInputTokens(uint32(chunk.UsageMetadata.PromptTokenCount)) //nolint:gosec
201234
}
@@ -209,16 +242,6 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
209242
tokenUsage.SetCachedInputTokens(uint32(chunk.UsageMetadata.CachedContentTokenCount)) //nolint:gosec
210243
}
211244
}
212-
213-
// Serialize to SSE format as expected by OpenAI API.
214-
err := serializeOpenAIChatCompletionChunk(*openAIChunk, &newBody)
215-
if err != nil {
216-
return nil, nil, metrics.TokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
217-
}
218-
219-
if span != nil {
220-
span.RecordResponseChunk(openAIChunk)
221-
}
222245
}
223246

224247
if endOfStream {
@@ -381,19 +404,14 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) convertGCPChunkToOpenAI(
381404
choices = []openai.ChatCompletionResponseChunkChoice{}
382405
}
383406

384-
// Convert usage to pointer if available.
385-
var usage *openai.Usage
386-
if chunk.UsageMetadata != nil {
387-
usage = ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
388-
}
389-
390407
return &openai.ChatCompletionResponseChunk{
391408
ID: chunk.ResponseID,
392409
Created: openai.JSONUNIXTime(chunk.CreateTime),
393410
Object: "chat.completion.chunk",
394411
Choices: choices,
395-
Usage: usage,
396-
Model: o.requestModel,
412+
// usage is nil for all chunks other than the last chunk
413+
Usage: nil,
414+
Model: o.requestModel,
397415
}
398416
}
399417

internal/translator/openai_gcpvertexai_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,9 @@ func TestOpenAIToGCPVertexAITranslatorV1ChatCompletion_ResponseBody(t *testing.T
10191019
endOfStream: true,
10201020
wantError: false,
10211021
wantHeaderMut: nil,
1022-
wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
1022+
wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk"}
1023+
1024+
data: {"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
10231025
10241026
data: [DONE]
10251027
`),
@@ -1228,7 +1230,9 @@ data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}],"usageMetadata":{
12281230
wantHeaderMut: nil,
12291231
wantBodyMut: []byte(`data: {"choices":[{"index":0,"delta":{"role":"assistant","reasoning_content":{"text":"let me think step by step and reply you."}}}],"object":"chat.completion.chunk"}
12301232
1231-
data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
1233+
data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk"}
1234+
1235+
data: {"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
12321236
12331237
data: [DONE]
12341238
`),

tests/extproc/testupstream_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,9 @@ data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":" you","role":"as
579579
580580
data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":" today","role":"assistant"}}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk"}
581581
582-
data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":"?","role":"assistant"},"finish_reason":"stop"}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk","usage":{"prompt_tokens":10,"completion_tokens":7,"total_tokens":17,"completion_tokens_details":{},"prompt_tokens_details":{}}}
582+
data: {"id":"msg_123","choices":[{"index":0,"delta":{"content":"?","role":"assistant"},"finish_reason":"stop"}],"created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk"}
583+
584+
data: {"id":"msg_123","created":123,"model":"gemini-1.5-pro","object":"chat.completion.chunk","usage":{"prompt_tokens":10,"completion_tokens":7,"total_tokens":17,"completion_tokens_details":{},"prompt_tokens_details":{}}}
583585
584586
data: [DONE]
585587
`,

0 commit comments

Comments
 (0)