Skip to content

Commit 1b06346

Browse files
authored
refactor(translator): reduce Anthropic SDK usage in main path (envoyproxy#1616)
**Description** This reduces the Anthropic SDK usage in the translator package. **Related Issues/PRs (if applicable)** Related to envoyproxy#1611 --------- Signed-off-by: Takeshi Yoneda <[email protected]>
1 parent e55c5ae commit 1b06346

File tree

5 files changed

+58
-36
lines changed

5 files changed

+58
-36
lines changed

internal/translator/anthropic_anthropic.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,13 @@ func (a *anthropicToAnthropicTranslator) ResponseBody(_ map[string]string, body
9696
if err := json.NewDecoder(body).Decode(anthropicResp); err != nil {
9797
return nil, nil, tokenUsage, responseModel, fmt.Errorf("failed to unmarshal body: %w", err)
9898
}
99-
tokenUsage = ExtractLLMTokenUsageFromUsage(anthropicResp.Usage)
99+
tokenUsage = extractTokenUsageFromAnthropic(
100+
anthropicResp.Usage.InputTokens,
101+
anthropicResp.Usage.OutputTokens,
102+
anthropicResp.Usage.CacheReadInputTokens,
103+
anthropicResp.Usage.CacheCreationInputTokens,
104+
)
105+
100106
responseModel = cmp.Or(internalapi.ResponseModel(anthropicResp.Model), a.requestModel)
101107
return nil, nil, tokenUsage, responseModel, nil
102108
}
@@ -128,9 +134,21 @@ func (a *anthropicToAnthropicTranslator) extractUsageFromBufferEvent() (tokenUsa
128134
a.streamingResponseModel = internalapi.ResponseModel(eventUnion.Message.Model)
129135
}
130136
// Extract usage from message_start event
131-
tokenUsage = ExtractLLMTokenUsageFromUsage(eventUnion.Message.Usage)
137+
usage := eventUnion.Message.Usage
138+
tokenUsage = extractTokenUsageFromAnthropic(
139+
usage.InputTokens,
140+
usage.OutputTokens,
141+
usage.CacheReadInputTokens,
142+
usage.CacheCreationInputTokens,
143+
)
132144
case "message_delta":
133-
tokenUsage = ExtractLLMTokenUsageFromDeltaUsage(eventUnion.Usage)
145+
usage := eventUnion.Usage
146+
tokenUsage = extractTokenUsageFromAnthropic(
147+
usage.InputTokens,
148+
usage.OutputTokens,
149+
usage.CacheReadInputTokens,
150+
usage.CacheCreationInputTokens,
151+
)
134152
}
135153
}
136154
}

internal/translator/anthropic_usage.go

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,16 @@
66
package translator
77

88
import (
9-
"github.com/anthropics/anthropic-sdk-go"
10-
119
"github.com/envoyproxy/ai-gateway/internal/metrics"
1210
)
1311

14-
// ExtractLLMTokenUsage extracts the correct token usage from Anthropic API response.
12+
// extractTokenUsageFromAnthropic extracts the correct token usage from Anthropic API response.
1513
// According to Claude API documentation, total input tokens is the summation of:
1614
// input_tokens + cache_creation_input_tokens + cache_read_input_tokens
1715
//
1816
// This function works for both streaming and non-streaming responses by accepting
1917
// the common usage fields that exist in all Anthropic usage structures.
20-
func ExtractLLMTokenUsage(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64) metrics.TokenUsage {
18+
func extractTokenUsageFromAnthropic(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64) metrics.TokenUsage {
2119
// Calculate total input tokens as per Anthropic API documentation
2220
totalInputTokens := inputTokens + cacheCreationTokens + cacheReadTokens
2321

@@ -31,23 +29,3 @@ func ExtractLLMTokenUsage(inputTokens, outputTokens, cacheReadTokens, cacheCreat
3129
usage.SetCachedInputTokens(uint32(totalCachedTokens)) //nolint:gosec
3230
return usage
3331
}
34-
35-
// ExtractLLMTokenUsageFromUsage extracts token usage from anthropic.Usage struct (non-streaming).
36-
func ExtractLLMTokenUsageFromUsage(usage anthropic.Usage) metrics.TokenUsage {
37-
return ExtractLLMTokenUsage(
38-
usage.InputTokens,
39-
usage.OutputTokens,
40-
usage.CacheReadInputTokens,
41-
usage.CacheCreationInputTokens,
42-
)
43-
}
44-
45-
// ExtractLLMTokenUsageFromDeltaUsage extracts token usage from streaming message_delta events.
46-
func ExtractLLMTokenUsageFromDeltaUsage(usage anthropic.MessageDeltaUsage) metrics.TokenUsage {
47-
return ExtractLLMTokenUsage(
48-
usage.InputTokens,
49-
usage.OutputTokens,
50-
usage.CacheReadInputTokens,
51-
usage.CacheCreationInputTokens,
52-
)
53-
}

internal/translator/anthropic_usage_test.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func TestExtractLLMTokenUsage(t *testing.T) {
9494

9595
for _, tt := range tests {
9696
t.Run(tt.name, func(t *testing.T) {
97-
result := ExtractLLMTokenUsage(
97+
result := extractTokenUsageFromAnthropic(
9898
tt.inputTokens,
9999
tt.outputTokens,
100100
tt.cacheReadTokens,
@@ -165,7 +165,11 @@ func TestExtractLLMTokenUsageFromUsage(t *testing.T) {
165165

166166
for _, tt := range tests {
167167
t.Run(tt.name, func(t *testing.T) {
168-
result := ExtractLLMTokenUsageFromUsage(tt.usage)
168+
result := extractTokenUsageFromAnthropic(tt.usage.InputTokens,
169+
tt.usage.OutputTokens,
170+
tt.usage.CacheReadInputTokens,
171+
tt.usage.CacheCreationInputTokens,
172+
)
169173
expected := tokenUsageFrom(tt.expectedInputTokens, 0, tt.expectedOutputTokens, tt.expectedTotalTokens)
170174
expected.SetCachedInputTokens(tt.expectedCachedTokens)
171175
assert.Equal(t, expected, result)
@@ -225,7 +229,11 @@ func TestExtractLLMTokenUsageFromDeltaUsage(t *testing.T) {
225229

226230
for _, tt := range tests {
227231
t.Run(tt.name, func(t *testing.T) {
228-
result := ExtractLLMTokenUsageFromDeltaUsage(tt.usage)
232+
result := extractTokenUsageFromAnthropic(tt.usage.InputTokens,
233+
tt.usage.OutputTokens,
234+
tt.usage.CacheReadInputTokens,
235+
tt.usage.CacheCreationInputTokens,
236+
)
229237
expected := tokenUsageFrom(tt.expectedInputTokens, 0, tt.expectedOutputTokens, tt.expectedTotalTokens)
230238
expected.SetCachedInputTokens(tt.expectedCachedTokens)
231239
assert.Equal(t, expected, result)
@@ -238,7 +246,7 @@ func TestExtractLLMTokenUsage_EdgeCases(t *testing.T) {
238246
t.Run("negative values should be handled", func(t *testing.T) {
239247
// Note: In practice, the Anthropic API shouldn't return negative values,
240248
// but our function should handle them gracefully by casting to uint32.
241-
result := ExtractLLMTokenUsage(-10, -5, -2, -1)
249+
result := extractTokenUsageFromAnthropic(-10, -5, -2, -1)
242250

243251
// Negative int64 values will wrap around when cast to uint32.
244252
// This test documents current behavior rather than prescribing it.
@@ -249,7 +257,7 @@ func TestExtractLLMTokenUsage_EdgeCases(t *testing.T) {
249257
t.Run("maximum int64 values", func(t *testing.T) {
250258
// Test with very large values to ensure no overflow issues.
251259
// Note: This will result in truncation when casting to uint32.
252-
result := ExtractLLMTokenUsage(9223372036854775807, 1000, 500, 100)
260+
result := extractTokenUsageFromAnthropic(9223372036854775807, 1000, 500, 100)
253261
assert.NotNil(t, result)
254262
})
255263
}
@@ -266,7 +274,7 @@ func TestExtractLLMTokenUsage_ClaudeAPIDocumentationCompliance(t *testing.T) {
266274
cacheReadTokens := int64(30)
267275
outputTokens := int64(50)
268276

269-
result := ExtractLLMTokenUsage(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens)
277+
result := extractTokenUsageFromAnthropic(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens)
270278

271279
// Total input should be sum of all input token types.
272280
expectedTotalInputInt := inputTokens + cacheCreationTokens + cacheReadTokens

internal/translator/openai_gcpanthropic.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,13 @@ func (o *openAIToGCPAnthropicTranslatorV1ChatCompletion) ResponseBody(_ map[stri
785785
Choices: make([]openai.ChatCompletionResponseChoice, 0),
786786
Created: openai.JSONUNIXTime(time.Now()),
787787
}
788-
tokenUsage = ExtractLLMTokenUsageFromUsage(anthropicResp.Usage)
788+
usage := anthropicResp.Usage
789+
tokenUsage = extractTokenUsageFromAnthropic(
790+
usage.InputTokens,
791+
usage.OutputTokens,
792+
usage.CacheReadInputTokens,
793+
usage.CacheCreationInputTokens,
794+
)
789795
inputTokens, _ := tokenUsage.InputTokens()
790796
outputTokens, _ := tokenUsage.OutputTokens()
791797
totalTokens, _ := tokenUsage.TotalTokens()

internal/translator/openai_gcpanthropic_stream.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,13 @@ func (p *anthropicStreamParser) handleAnthropicStreamEvent(eventType []byte, dat
200200
}
201201
p.activeMessageID = event.Message.ID
202202
p.created = openai.JSONUNIXTime(time.Now())
203-
usage := ExtractLLMTokenUsageFromUsage(event.Message.Usage)
203+
u := event.Message.Usage
204+
usage := extractTokenUsageFromAnthropic(
205+
u.InputTokens,
206+
u.OutputTokens,
207+
u.CacheReadInputTokens,
208+
u.CacheCreationInputTokens,
209+
)
204210
// For message_start, we store the initial usage but don't add to the accumulated
205211
// The message_delta event will contain the final totals
206212
if input, ok := usage.InputTokens(); ok {
@@ -282,7 +288,13 @@ func (p *anthropicStreamParser) handleAnthropicStreamEvent(eventType []byte, dat
282288
if err := json.Unmarshal(data, &event); err != nil {
283289
return nil, fmt.Errorf("unmarshal message_delta: %w", err)
284290
}
285-
usage := ExtractLLMTokenUsageFromDeltaUsage(event.Usage)
291+
u := event.Usage
292+
usage := extractTokenUsageFromAnthropic(
293+
u.InputTokens,
294+
u.OutputTokens,
295+
u.CacheReadInputTokens,
296+
u.CacheCreationInputTokens,
297+
)
286298
// For message_delta, accumulate the incremental output tokens
287299
if output, ok := usage.OutputTokens(); ok {
288300
p.tokenUsage.AddOutputTokens(output)

0 commit comments

Comments
 (0)