-
Notifications
You must be signed in to change notification settings - Fork 117
fix: buffer gzip data for anthropic messages #1247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
34385a4
e89196d
7909e9e
33a5728
c163006
ba8fec2
a6b6ecd
eb5d1fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,7 +90,7 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseHeaders(_ map[string]string) | |
|
|
||
| // ResponseBody implements [AnthropicMessagesTranslator.ResponseBody] for Anthropic to GCP Anthropic. | ||
| // This is essentially a passthrough since both use the same Anthropic response format. | ||
| func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, endOfStream bool) ( | ||
| func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, body io.Reader, isStreaming bool) ( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why renaming this? endOfStream is different from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good question, but I indeed mean
This boolean basically tells which one to use. Once you raised this question let me elaborate. Previously we would do translation while it's streaming, now because we accumulate we don't care about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you leave a comment so other maintainers are aware of the difference? |
||
| headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, tokenUsage LLMTokenUsage, responseModel string, err error, | ||
| ) { | ||
| // Read the response body for both streaming and non-streaming. | ||
|
|
@@ -99,8 +99,8 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, bo | |
| return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("failed to read response body: %w", err) | ||
| } | ||
|
|
||
| // For streaming chunks, parse SSE format to extract token usage. | ||
| if !endOfStream { | ||
| // For streaming requests, parse SSE format to extract token usage. | ||
| if isStreaming { | ||
| // Parse SSE format - split by lines and look for data: lines. | ||
| for line := range bytes.Lines(bodyBytes) { | ||
| line = bytes.TrimSpace(line) | ||
|
|
@@ -117,14 +117,16 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, bo | |
| switch eventType { | ||
| case "message_start": | ||
| // Extract input tokens from message.usage. | ||
| // Now handles complete response with potentially multiple events. | ||
| if messageData, ok := eventData["message"].(map[string]any); ok { | ||
| if usageData, ok := messageData["usage"].(map[string]any); ok { | ||
| if inputTokens, ok := usageData["input_tokens"].(float64); ok { | ||
| tokenUsage.InputTokens = uint32(inputTokens) //nolint:gosec | ||
| // Accumulate input tokens (though typically only one message_start per conversation) | ||
| tokenUsage.InputTokens += uint32(inputTokens) //nolint:gosec | ||
| } | ||
| // Some message_start events may include initial output tokens. | ||
| if outputTokens, ok := usageData["output_tokens"].(float64); ok && outputTokens > 0 { | ||
| tokenUsage.OutputTokens = uint32(outputTokens) //nolint:gosec | ||
| tokenUsage.OutputTokens += uint32(outputTokens) //nolint:gosec | ||
| } | ||
| tokenUsage.TotalTokens = tokenUsage.InputTokens + tokenUsage.OutputTokens | ||
| } | ||
|
|
@@ -143,18 +145,16 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, bo | |
| } | ||
| } | ||
|
|
||
| return nil, &extprocv3.BodyMutation{ | ||
| Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, | ||
| }, tokenUsage, a.requestModel, nil | ||
| // For streaming responses, we only extract token usage, don't modify the body | ||
| // Return nil bodyMutation to pass through original data (potentially gzipped) | ||
| return nil, nil, tokenUsage, a.requestModel, nil | ||
| } | ||
|
|
||
| // Parse the Anthropic response to extract token usage. | ||
| var anthropicResp anthropic.Message | ||
| if err = json.Unmarshal(bodyBytes, &anthropicResp); err != nil { | ||
| // If we can't parse as Anthropic format, pass through as-is. | ||
| return nil, &extprocv3.BodyMutation{ | ||
| Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, | ||
| }, LLMTokenUsage{}, a.requestModel, nil | ||
| // If we can't parse as Anthropic format, pass through as-is without modification. | ||
| return nil, nil, LLMTokenUsage{}, a.requestModel, nil | ||
| } | ||
|
|
||
| // Extract token usage from the response. | ||
|
|
@@ -164,12 +164,6 @@ func (a *anthropicToGCPAnthropicTranslator) ResponseBody(_ map[string]string, bo | |
| TotalTokens: uint32(anthropicResp.Usage.InputTokens + anthropicResp.Usage.OutputTokens), //nolint:gosec | ||
| } | ||
|
|
||
| // Pass through the response body unchanged since both input and output are Anthropic format. | ||
| headerMutation = &extprocv3.HeaderMutation{} | ||
| setContentLength(headerMutation, bodyBytes) | ||
| bodyMutation = &extprocv3.BodyMutation{ | ||
| Mutation: &extprocv3.BodyMutation_Body{Body: bodyBytes}, | ||
| } | ||
|
|
||
| return headerMutation, bodyMutation, tokenUsage, a.requestModel, nil | ||
| // Pass through the response body unchanged - don't create body mutation to preserve original encoding. | ||
| return nil, nil, tokenUsage, a.requestModel, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ import ( | |
| "compress/gzip" | ||
| "fmt" | ||
| "io" | ||
| "log/slog" | ||
|
|
||
| extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" | ||
| ) | ||
|
|
@@ -42,6 +43,82 @@ func decodeContentIfNeeded(body []byte, contentEncoding string) (contentDecoding | |
| } | ||
| } | ||
|
|
||
| // decodeContentWithBuffering decompresses response body with buffering support for streaming. | ||
| // Accumulates chunks in the provided buffer until complete gzip data is available. | ||
| // Returns a reader for the (potentially decompressed) body and metadata about the encoding. | ||
| func decodeContentWithBuffering(body []byte, contentEncoding string, gzipBuffer *[]byte, endOfStream bool) (contentDecodingResult, error) { | ||
| switch contentEncoding { | ||
| case "gzip": | ||
|
|
||
| // Accumulate chunks in buffer | ||
| *gzipBuffer = append(*gzipBuffer, body...) | ||
|
|
||
| // Try to decompress the accumulated buffer | ||
| if len(*gzipBuffer) > 0 { | ||
| gzipReader, err := gzip.NewReader(bytes.NewReader(*gzipBuffer)) | ||
| if err != nil { | ||
|
||
| // If it's not endOfStream, keep buffering | ||
| if !endOfStream { | ||
| return contentDecodingResult{ | ||
| reader: bytes.NewReader(nil), // Empty reader to signal buffering in progress | ||
| isEncoded: true, | ||
| }, nil | ||
| } | ||
| // If endOfStream and still can't read, pass through buffered data | ||
| slog.Info("gzip buffering: invalid header at end of stream, passing through buffered data", | ||
| "error", err, | ||
| "buffer_size", len(*gzipBuffer)) | ||
| result := contentDecodingResult{ | ||
| reader: bytes.NewReader(*gzipBuffer), | ||
| isEncoded: true, | ||
| } | ||
| *gzipBuffer = nil // Clear buffer | ||
| return result, nil | ||
| } | ||
| defer gzipReader.Close() | ||
|
|
||
| decompressedBody, err := io.ReadAll(gzipReader) | ||
VarSuren marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if err != nil { | ||
| // If it's not endOfStream, keep buffering | ||
| if !endOfStream { | ||
| return contentDecodingResult{ | ||
| reader: bytes.NewReader(nil), // Empty reader to signal buffering in progress | ||
| isEncoded: true, | ||
| }, nil | ||
| } | ||
| // If endOfStream and decompression failed, pass through buffered data | ||
| slog.Info("gzip buffering: decompression failed at end of stream, passing through buffered data", | ||
| "error", err, | ||
| "buffer_size", len(*gzipBuffer)) | ||
| result := contentDecodingResult{ | ||
| reader: bytes.NewReader(*gzipBuffer), | ||
| isEncoded: true, | ||
| } | ||
| *gzipBuffer = nil // Clear buffer | ||
| return result, nil | ||
| } | ||
|
|
||
| // Successfully decompressed! | ||
| *gzipBuffer = nil // Clear buffer | ||
| return contentDecodingResult{ | ||
| reader: bytes.NewReader(decompressedBody), | ||
| isEncoded: true, | ||
| }, nil | ||
| } | ||
|
|
||
| // Empty buffer, return empty | ||
| return contentDecodingResult{ | ||
| reader: bytes.NewReader(nil), // Empty reader for empty buffer | ||
| isEncoded: true, | ||
| }, nil | ||
| default: | ||
| return contentDecodingResult{ | ||
| reader: bytes.NewReader(body), | ||
| isEncoded: false, | ||
| }, nil | ||
| } | ||
| } | ||
|
|
||
| // removeContentEncodingIfNeeded removes the content-encoding header if the body was modified and was encoded. | ||
| // This is needed when the transformation modifies the body content but the response was originally compressed. | ||
| func removeContentEncodingIfNeeded(headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, isEncoded bool) *extprocv3.HeaderMutation { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.