Skip to content

Commit 01fede4

Browse files
committed
feat: add token usage recording for responses blocking interceptor
1 parent 0959b4a commit 01fede4

File tree

7 files changed

+266
-2
lines changed

7 files changed

+266
-2
lines changed

fixtures/fixtures.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ var (
5454
//go:embed openai/responses/blocking/builtin_tool.txtar
5555
OaiResponsesBlockingBuiltinTool []byte
5656

57+
//go:embed openai/responses/blocking/cached_input_tokens.txtar
58+
OaiResponsesBlockingCachedInputTokens []byte
59+
5760
//go:embed openai/responses/blocking/custom_tool.txtar
5861
OaiResponsesBlockingCustomTool []byte
5962

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
-- request --
2+
{
3+
"input": "This was a large input...",
4+
"model": "gpt-4.1",
5+
"prompt_cache_key": "key-123",
6+
"prompt_cache_retention": "24h",
7+
"stream": false
8+
}
9+
10+
-- non-streaming --
11+
{
12+
"id": "resp_0cd5d6b8310055d600696a1776b42c81a199fbb02248a8bfa0",
13+
"object": "response",
14+
"created_at": 1768560502,
15+
"status": "completed",
16+
"background": false,
17+
"billing": {
18+
"payer": "developer"
19+
},
20+
"completed_at": 1768560504,
21+
"error": null,
22+
"frequency_penalty": 0.0,
23+
"incomplete_details": null,
24+
"instructions": null,
25+
"max_output_tokens": null,
26+
"max_tool_calls": null,
27+
"model": "gpt-4.1-2025-04-14",
28+
"output": [
29+
{
30+
"id": "msg_0cd5d6b8310055d600696a177708b881a1bb53034def764104",
31+
"type": "message",
32+
"status": "completed",
33+
"content": [
34+
{
35+
"type": "output_text",
36+
"annotations": [],
37+
"logprobs": [],
38+
"text": "- I provide clear, accurate, and concise answers tailored to your requests.\n- I can process and summarize large volumes of information quickly.\n- I adapt my responses based on your needs and instructions for precision and relevance."
39+
}
40+
],
41+
"role": "assistant"
42+
}
43+
],
44+
"parallel_tool_calls": true,
45+
"presence_penalty": 0.0,
46+
"previous_response_id": null,
47+
"prompt_cache_key": "key-123",
48+
"prompt_cache_retention": "24h",
49+
"reasoning": {
50+
"effort": null,
51+
"summary": null
52+
},
53+
"safety_identifier": null,
54+
"service_tier": "default",
55+
"store": true,
56+
"temperature": 1.0,
57+
"text": {
58+
"format": {
59+
"type": "text"
60+
},
61+
"verbosity": "medium"
62+
},
63+
"tool_choice": "auto",
64+
"tools": [],
65+
"top_logprobs": 0,
66+
"top_p": 1.0,
67+
"truncation": "disabled",
68+
"usage": {
69+
"input_tokens": 12033,
70+
"input_tokens_details": {
71+
"cached_tokens": 11904
72+
},
73+
"output_tokens": 44,
74+
"output_tokens_details": {
75+
"reasoning_tokens": 0
76+
},
77+
"total_tokens": 12077
78+
},
79+
"user": null,
80+
"metadata": {}
81+
}

intercept/responses/base.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ func (i *responsesInterceptionBase) recordUserPrompt(ctx context.Context, respon
223223

224224
func (i *responsesInterceptionBase) recordToolUsage(ctx context.Context, response *responses.Response) {
225225
if response == nil {
226-
i.logger.Warn(ctx, "got empty response, skipping tool usage recording")
227226
return
228227
}
229228

@@ -265,6 +264,32 @@ func (i *responsesInterceptionBase) parseFunctionCallJSONArgs(ctx context.Contex
265264
return trimmed
266265
}
267266

267+
func (i *responsesInterceptionBase) recordTokenUsage(ctx context.Context, response *responses.Response) {
268+
if response == nil {
269+
return
270+
}
271+
272+
usage := response.Usage
273+
274+
// Keeping logic consistent with chat completions
275+
// Input *includes* the cached tokens, so we subtract them here to reflect actual input token usage.
276+
inputNonCacheTokens := usage.InputTokens - usage.InputTokensDetails.CachedTokens
277+
278+
if err := i.recorder.RecordTokenUsage(ctx, &recorder.TokenUsageRecord{
279+
InterceptionID: i.ID().String(),
280+
MsgID: response.ID,
281+
Input: inputNonCacheTokens,
282+
Output: usage.OutputTokens,
283+
ExtraTokenTypes: map[string]int64{
284+
"input_cached": usage.InputTokensDetails.CachedTokens,
285+
"output_reasoning": usage.OutputTokensDetails.ReasoningTokens,
286+
"total_tokens": usage.TotalTokens,
287+
},
288+
}); err != nil {
289+
i.logger.Warn(ctx, "failed to record token usage", slog.Error(err))
290+
}
291+
}
292+
268293
// responseCopier helper struct to send original response to the client
269294
type responseCopier struct {
270295
buff deltaBuffer

intercept/responses/base_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,74 @@ func TestParseJSONArgs(t *testing.T) {
379379
})
380380
}
381381
}
382+
383+
func TestRecordTokenUsage(t *testing.T) {
384+
t.Parallel()
385+
386+
id := uuid.MustParse("22222222-2222-2222-2222-222222222222")
387+
388+
tests := []struct {
389+
name string
390+
response *oairesponses.Response
391+
expected *recorder.TokenUsageRecord
392+
}{
393+
{
394+
name: "nil_response",
395+
response: nil,
396+
expected: nil,
397+
},
398+
{
399+
name: "with_all_token_details",
400+
response: &oairesponses.Response{
401+
ID: "resp_full",
402+
Usage: oairesponses.ResponseUsage{
403+
InputTokens: 10,
404+
OutputTokens: 20,
405+
TotalTokens: 30,
406+
InputTokensDetails: oairesponses.ResponseUsageInputTokensDetails{
407+
CachedTokens: 5,
408+
},
409+
OutputTokensDetails: oairesponses.ResponseUsageOutputTokensDetails{
410+
ReasoningTokens: 5,
411+
},
412+
},
413+
},
414+
expected: &recorder.TokenUsageRecord{
415+
InterceptionID: id.String(),
416+
MsgID: "resp_full",
417+
Input: 5, // 10 input - 5 cached
418+
Output: 20,
419+
ExtraTokenTypes: map[string]int64{
420+
"input_cached": 5,
421+
"output_reasoning": 5,
422+
"total_tokens": 30,
423+
},
424+
},
425+
},
426+
}
427+
428+
for _, tc := range tests {
429+
t.Run(tc.name, func(t *testing.T) {
430+
t.Parallel()
431+
432+
rec := &testutil.MockRecorder{}
433+
base := &responsesInterceptionBase{
434+
id: id,
435+
recorder: rec,
436+
logger: slog.Make(),
437+
}
438+
439+
base.recordTokenUsage(t.Context(), tc.response)
440+
441+
tokens := rec.RecordedTokenUsages()
442+
if tc.expected == nil {
443+
require.Empty(t, tokens)
444+
} else {
445+
require.Len(t, tokens, 1)
446+
got := tokens[0]
447+
got.CreatedAt = time.Time{} // ignore time
448+
require.Equal(t, tc.expected, got)
449+
}
450+
})
451+
}
452+
}

intercept/responses/blocking.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ func (i *BlockingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r *
5656
if response != nil {
5757
i.recordUserPrompt(ctx, response.ID)
5858
i.recordToolUsage(ctx, response)
59+
i.recordTokenUsage(ctx, response)
60+
} else {
61+
i.logger.Warn(ctx, "got empty response, skipping prompt, tool usage and token usage recording")
5962
}
6063

6164
if upstreamErr != nil && !respCopy.responseReceived.Load() {

intercept/responses/streaming.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ func (i *StreamingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r
116116
}
117117
}
118118
i.recordUserPrompt(ctx, responseID)
119-
i.recordToolUsage(ctx, completedResponse)
119+
if completedResponse != nil {
120+
i.recordToolUsage(ctx, completedResponse)
121+
} else {
122+
i.logger.Warn(ctx, "got empty response, skipping tool usage recording")
123+
}
120124

121125
b, err := respCopy.readAll()
122126
if err != nil {

responses_integration_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,23 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
3838
expectModel string
3939
expectPromptRecorded string
4040
expectToolRecorded *recorder.ToolUsageRecord
41+
expectTokenUsage *recorder.TokenUsageRecord
4142
}{
4243
{
4344
name: "blocking_simple",
4445
fixture: fixtures.OaiResponsesBlockingSimple,
4546
expectModel: "gpt-4o-mini",
4647
expectPromptRecorded: "tell me a joke",
48+
expectTokenUsage: &recorder.TokenUsageRecord{
49+
MsgID: "resp_0388c79043df3e3400695f9f83cd6481959062cec6830d8d51",
50+
Input: 11,
51+
Output: 18,
52+
ExtraTokenTypes: map[string]int64{
53+
"input_cached": 0,
54+
"output_reasoning": 0,
55+
"total_tokens": 29,
56+
},
57+
},
4758
},
4859
{
4960
name: "blocking_builtin_tool",
@@ -56,6 +67,32 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
5667
Args: map[string]any{"a": float64(3), "b": float64(5)},
5768
Injected: false,
5869
},
70+
expectTokenUsage: &recorder.TokenUsageRecord{
71+
MsgID: "resp_0da6045a8b68fa5200695fa23dcc2c81a19c849f627abf8a31",
72+
Input: 58,
73+
Output: 18,
74+
ExtraTokenTypes: map[string]int64{
75+
"input_cached": 0,
76+
"output_reasoning": 0,
77+
"total_tokens": 76,
78+
},
79+
},
80+
},
81+
{
82+
name: "blocking_cached_input_tokens",
83+
fixture: fixtures.OaiResponsesBlockingCachedInputTokens,
84+
expectModel: "gpt-4.1",
85+
expectPromptRecorded: "This was a large input...",
86+
expectTokenUsage: &recorder.TokenUsageRecord{
87+
MsgID: "resp_0cd5d6b8310055d600696a1776b42c81a199fbb02248a8bfa0",
88+
Input: 129, // 12033 input - 11904 cached
89+
Output: 44,
90+
ExtraTokenTypes: map[string]int64{
91+
"input_cached": 11904,
92+
"output_reasoning": 0,
93+
"total_tokens": 12077,
94+
},
95+
},
5996
},
6097
{
6198
name: "blocking_custom_tool",
@@ -68,18 +105,48 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
68105
Args: "print(\"hello world\")",
69106
Injected: false,
70107
},
108+
expectTokenUsage: &recorder.TokenUsageRecord{
109+
MsgID: "resp_09c614364030cdf000696942589da081a0af07f5859acb7308",
110+
Input: 64,
111+
Output: 148,
112+
ExtraTokenTypes: map[string]int64{
113+
"input_cached": 0,
114+
"output_reasoning": 128,
115+
"total_tokens": 212,
116+
},
117+
},
71118
},
72119
{
73120
name: "blocking_conversation",
74121
fixture: fixtures.OaiResponsesBlockingConversation,
75122
expectModel: "gpt-4o-mini",
76123
expectPromptRecorded: "explain why this is funny.",
124+
expectTokenUsage: &recorder.TokenUsageRecord{
125+
MsgID: "resp_0c9f1f0524a858fa00695fa15fc5a081958f4304aafd3bdec2",
126+
Input: 48,
127+
Output: 116,
128+
ExtraTokenTypes: map[string]int64{
129+
"input_cached": 0,
130+
"output_reasoning": 0,
131+
"total_tokens": 164,
132+
},
133+
},
77134
},
78135
{
79136
name: "blocking_prev_response_id",
80137
fixture: fixtures.OaiResponsesBlockingPrevResponseID,
81138
expectModel: "gpt-4o-mini",
82139
expectPromptRecorded: "explain why this is funny.",
140+
expectTokenUsage: &recorder.TokenUsageRecord{
141+
MsgID: "resp_0388c79043df3e3400695f9f86cfa08195af1f015c60117a83",
142+
Input: 43,
143+
Output: 129,
144+
ExtraTokenTypes: map[string]int64{
145+
"input_cached": 0,
146+
"output_reasoning": 0,
147+
"total_tokens": 172,
148+
},
149+
},
83150
},
84151
{
85152
name: "streaming_simple",
@@ -226,6 +293,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
226293
} else {
227294
require.Empty(t, recordedTools)
228295
}
296+
297+
recordedTokens := mockRecorder.RecordedTokenUsages()
298+
if tc.expectTokenUsage != nil {
299+
require.Len(t, recordedTokens, 1)
300+
recordedTokens[0].InterceptionID = tc.expectTokenUsage.InterceptionID // ignore interception id
301+
recordedTokens[0].CreatedAt = tc.expectTokenUsage.CreatedAt // ignore time
302+
require.Equal(t, tc.expectTokenUsage, recordedTokens[0])
303+
} else {
304+
require.Empty(t, recordedTokens)
305+
}
229306
})
230307
}
231308
}

0 commit comments

Comments
 (0)