Skip to content

Commit 90ba3e9

Browse files
fix: add stream value in gcp anthropic body (#1370)
**Description** This PR fixes a bug where streaming chat completion requests from an OpenAI-compatible client to the GCP Anthropic backend were failing. Although the API returned a 200 OK status, the response was a non-streaming JSON object instead of a text/event-stream, causing the client to receive an empty stream and to fail our integration test assertions. Our integration tests output was like `> assert model_output, f"Received empty {model_output=}" E AssertionError: Received empty model_output='' E assert ''` and after adding debugging logs, the response header and the output message showed that the response in not streamed, even though the url is set to stream. It seems you can send to the stream URL but if the param isn't set, that takes precedence. the header output { "time": "2025-10-14T16:02:11.407-04:00", "level": "DEBUG", "msg": "response headers processing", "response_headers": "headers:{key:\":status\" raw_value:\"200\"} headers:{key:\"content-type\" raw_value:\"application/json\"} headers:{key:\"server\" raw_value:\"hypercorn-h11\"} ..." } This second log shows the entire response body arriving in a single chunk with end_of_stream:true. This confirmed that instead of streaming the response piece-by-piece, the server sent the complete final message all at once. { "time": "2025-10-14T16:02:11.408-04:00", "level": "DEBUG", "msg": "response body processing", "request": "response_body:{body:\"{\\\"id\\\":\\\"msg_vrtx_019F694kiwv6Z5BQApos5MJy\\\",\\\"type\\\":\\\"message\\\",\\\"role\\\":\\\"assistant\\\", ... ,\\\"content\\\":[{\\\"type\\\":\\\"text\\\",\\\"text\\\":\\\"I'm doing well, thank you for asking! I'm here and ready to help with whatever you'd like to chat about or work on. How are you doing today?\\\"}],\\\"stop_reason\\\":\\\"end_turn\\\", ... }\" end_of_stream:true}" } The root cause was the missing "stream": true field in the JSON payload sent to GCP's :streamRawPredict endpoint. This PR modifies the OpenAIToGCPAnthropicTranslator to conditionally add this field to the request body when the original client request is for a stream. For context, it seems like this is how anthropic's go sdk handles it https://github.com/anthropics/anthropic-sdk-go/blob/e8befdc7fdceba33c9000b0b50061b8a42cb6c04/message.go#L86 its not a field in the message param object. The Fix: The solution implemented in this PR is to ensure the translated request body always has the field stream: true for streaming requests: Modify RequestBody: The RequestBody function in openaittogcpanthropic.go has been updated to check if the incoming openAIReq.Stream is true. Inject Stream Field: If streaming is requested, we use the sjson library to inject the "stream": true key-value pair into the final JSON payload before it is sent to the GCP endpoint. Add Unit Test: The existing unit test "Streaming Request Path" has been renamed to "Streaming Request Path and Body" and an assertion has been added to verify that the stream field is correctly set to true in the marshaled body, preventing future regressions --------- Signed-off-by: Alexa Griffith <[email protected]> Signed-off-by: Sukumar Gaonkar <[email protected]> Signed-off-by: Dan Sun <[email protected]>
1 parent 220b90c commit 90ba3e9

File tree

3 files changed

+14
-4
lines changed

3 files changed

+14
-4
lines changed

internal/extproc/translator/openai_gcpanthropic.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) RequestBody(_ []byte, o
548548
specifier := "rawPredict"
549549
if openAIReq.Stream {
550550
specifier = "streamRawPredict"
551+
body, err = sjson.SetBytes(body, "stream", true)
552+
if err != nil {
553+
return
554+
}
551555
o.streamParser = newAnthropicStreamParser(o.requestModel)
552556
}
553557

@@ -558,7 +562,10 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) RequestBody(_ []byte, o
558562
if o.apiVersion != "" {
559563
anthropicVersion = o.apiVersion
560564
}
561-
body, _ = sjson.SetBytes(body, anthropicVersionKey, anthropicVersion)
565+
body, err = sjson.SetBytes(body, anthropicVersionKey, anthropicVersion)
566+
if err != nil {
567+
return
568+
}
562569

563570
headerMutation, bodyMutation = buildRequestMutations(pathSuffix, body)
564571
return

internal/extproc/translator/openai_gcpanthropic_test.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,15 +191,15 @@ func TestOpenAIToGCPAnthropicTranslatorV1ChatCompletion_RequestBody(t *testing.T
191191
require.Equal(t, thirdMsg, gjson.GetBytes(body, "messages.0.content.0.text").String())
192192
})
193193

194-
t.Run("Streaming Request Path", func(t *testing.T) {
194+
t.Run("Streaming Request Validation", func(t *testing.T) {
195195
streamReq := &openai.ChatCompletionRequest{
196196
Model: claudeTestModel,
197197
Messages: []openai.ChatCompletionMessageParamUnion{},
198198
MaxTokens: ptr.To(int64(100)),
199199
Stream: true,
200200
}
201201
translator := NewChatCompletionOpenAIToGCPAnthropicTranslator("", "")
202-
hm, _, err := translator.RequestBody(nil, streamReq, false)
202+
hm, bm, err := translator.RequestBody(nil, streamReq, false)
203203
require.NoError(t, err)
204204
require.NotNil(t, hm)
205205

@@ -208,6 +208,9 @@ func TestOpenAIToGCPAnthropicTranslatorV1ChatCompletion_RequestBody(t *testing.T
208208
require.Equal(t, ":path", pathHeader[0].Header.Key)
209209
expectedPath := fmt.Sprintf("publishers/anthropic/models/%s:streamRawPredict", streamReq.Model)
210210
require.Equal(t, expectedPath, string(pathHeader[0].Header.RawValue))
211+
212+
body := bm.GetBody()
213+
require.True(t, gjson.GetBytes(body, "stream").Bool(), `body should contain "stream": true`)
211214
})
212215

213216
t.Run("Test message param", func(t *testing.T) {

tests/extproc/testupstream_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ data: [DONE]
534534
method: http.MethodPost,
535535
responseType: "sse",
536536
requestBody: `{"model":"claude-3-sonnet","max_completion_tokens":1024, "messages":[{"role":"user","content":"Why is the sky blue?"}], "stream": true}`,
537-
expRequestBody: `{"max_tokens":1024,"messages":[{"content":[{"text":"Why is the sky blue?","type":"text"}],"role":"user"}],"anthropic_version":"vertex-2023-10-16"}`,
537+
expRequestBody: `{"max_tokens":1024,"messages":[{"content":[{"text":"Why is the sky blue?","type":"text"}],"role":"user"}],"stream":true,"anthropic_version":"vertex-2023-10-16"}`,
538538
expHost: "gcp-region-aiplatform.googleapis.com",
539539
expPath: "/v1/projects/gcp-project-name/locations/gcp-region/publishers/anthropic/models/claude-3-sonnet:streamRawPredict",
540540
expRequestHeaders: map[string]string{"Authorization": "Bearer " + fakeGCPAuthToken},

0 commit comments

Comments
 (0)