Skip to content

Commit b2abde5

Browse files
committed
feat: add token usage recording for responses streaming interceptor
1 parent 2102f13 commit b2abde5

File tree

5 files changed

+119
-1
lines changed

5 files changed

+119
-1
lines changed

fixtures/fixtures.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ var (
8080
//go:embed openai/responses/streaming/builtin_tool.txtar
8181
OaiResponsesStreamingBuiltinTool []byte
8282

83+
//go:embed openai/responses/streaming/cached_input_tokens.txtar
84+
OaiResponsesStreamingCachedInputTokens []byte
85+
8386
//go:embed openai/responses/streaming/custom_tool.txtar
8487
OaiResponsesStreamingCustomTool []byte
8588

fixtures/openai/responses/blocking/cached_input_tokens.txtar

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"parallel_tool_calls": true,
4545
"presence_penalty": 0.0,
4646
"previous_response_id": null,
47-
"prompt_cache_key": "demo-user-123",
47+
"prompt_cache_key": "key-123",
4848
"prompt_cache_retention": "24h",
4949
"reasoning": {
5050
"effort": null,
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
-- request --
2+
{
3+
"model": "gpt-5.2-codex",
4+
"input": "Test cached input tokens.",
5+
"stream": true
6+
}
7+
8+
-- streaming --
9+
event: response.created
10+
data: {"type":"response.created","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
11+
12+
event: response.in_progress
13+
data: {"type":"response.in_progress","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
14+
15+
event: response.output_item.added
16+
data: {"type":"response.output_item.added","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
17+
18+
event: response.content_part.added
19+
data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":""},"sequence_number":3}
20+
21+
event: response.output_text.delta
22+
data: {"type":"response.output_text.delta","content_index":0,"delta":"Test","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":4}
23+
24+
event: response.output_text.delta
25+
data: {"type":"response.output_text.delta","content_index":0,"delta":" response","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":5}
26+
27+
event: response.output_text.delta
28+
data: {"type":"response.output_text.delta","content_index":0,"delta":" with","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":6}
29+
30+
event: response.output_text.delta
31+
data: {"type":"response.output_text.delta","content_index":0,"delta":" cached","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":7}
32+
33+
event: response.output_text.delta
34+
data: {"type":"response.output_text.delta","content_index":0,"delta":" tokens.","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":8}
35+
36+
event: response.output_text.done
37+
data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"text":"Test response with cached tokens.","sequence_number":9}
38+
39+
event: response.content_part.done
40+
data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":"Test response with cached tokens."},"sequence_number":10}
41+
42+
event: response.output_item.done
43+
data: {"type":"response.output_item.done","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"},"output_index":0,"sequence_number":11}
44+
45+
event: response.completed
46+
data: {"type":"response.completed","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"completed","background":false,"completed_at":1768559627,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":"019bc657-f77b-7292-b5f4-2e8d6c2b0945","prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"default","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":{"input_tokens":16909,"input_tokens_details":{"cached_tokens":15744},"output_tokens":54,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":16963},"user":null,"metadata":{}},"sequence_number":12}
47+

intercept/responses/streaming.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ func (i *StreamingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r
115115
i.recordUserPrompt(ctx, responseID)
116116
if completedResponse != nil {
117117
i.recordToolUsage(ctx, completedResponse)
118+
i.recordTokenUsage(ctx, completedResponse)
118119
} else {
119120
i.logger.Warn(ctx, "got empty response, skipping tool usage recording")
120121
}

responses_integration_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,33 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
154154
streaming: true,
155155
expectModel: "gpt-4o-mini",
156156
expectPromptRecorded: "tell me a joke",
157+
expectTokenUsage: &recorder.TokenUsageRecord{
158+
MsgID: "resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000",
159+
Input: 11,
160+
Output: 18,
161+
ExtraTokenTypes: map[string]int64{
162+
"input_cached": 0,
163+
"output_reasoning": 0,
164+
"total_tokens": 29,
165+
},
166+
},
157167
},
158168
{
159169
name: "streaming_codex",
160170
fixture: fixtures.OaiResponsesStreamingCodex,
161171
streaming: true,
162172
expectModel: "gpt-5-codex",
163173
expectPromptRecorded: "hello",
174+
expectTokenUsage: &recorder.TokenUsageRecord{
175+
MsgID: "resp_0e172b76542a9100016964f7e63d888191a2a28cb2ba0ab6d3",
176+
Input: 4006,
177+
Output: 13,
178+
ExtraTokenTypes: map[string]int64{
179+
"input_cached": 0,
180+
"output_reasoning": 0,
181+
"total_tokens": 4019,
182+
},
183+
},
164184
},
165185
{
166186
name: "streaming_builtin_tool",
@@ -174,6 +194,33 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
174194
Args: map[string]any{"a": float64(3), "b": float64(5)},
175195
Injected: false,
176196
},
197+
expectTokenUsage: &recorder.TokenUsageRecord{
198+
MsgID: "resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458",
199+
Input: 58,
200+
Output: 18,
201+
ExtraTokenTypes: map[string]int64{
202+
"input_cached": 0,
203+
"output_reasoning": 0,
204+
"total_tokens": 76,
205+
},
206+
},
207+
},
208+
{
209+
name: "streaming_cached_tokens",
210+
fixture: fixtures.OaiResponsesStreamingCachedInputTokens,
211+
streaming: true,
212+
expectModel: "gpt-5.2-codex",
213+
expectPromptRecorded: "Test cached input tokens.",
214+
expectTokenUsage: &recorder.TokenUsageRecord{
215+
MsgID: "resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35",
216+
Input: 1165, // 16909 input - 15744 cached
217+
Output: 54,
218+
ExtraTokenTypes: map[string]int64{
219+
"input_cached": 15744,
220+
"output_reasoning": 0,
221+
"total_tokens": 16963,
222+
},
223+
},
177224
},
178225
{
179226
name: "streaming_custom_tool",
@@ -187,6 +234,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
187234
Args: "print(\"hello world\")",
188235
Injected: false,
189236
},
237+
expectTokenUsage: &recorder.TokenUsageRecord{
238+
MsgID: "resp_0c26996bc41c2a0500696942e83634819fb71b2b8ff8a4a76c",
239+
Input: 64,
240+
Output: 340,
241+
ExtraTokenTypes: map[string]int64{
242+
"input_cached": 0,
243+
"output_reasoning": 320,
244+
"total_tokens": 404,
245+
},
246+
},
190247
},
191248
{
192249
name: "streaming_conversation",
@@ -201,6 +258,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
201258
streaming: true,
202259
expectModel: "gpt-4o-mini",
203260
expectPromptRecorded: "explain why this is funny.",
261+
expectTokenUsage: &recorder.TokenUsageRecord{
262+
MsgID: "resp_0f9c4b2f224d858000695fa0649b8c8197b38914b15a7add0e",
263+
Input: 43,
264+
Output: 182,
265+
ExtraTokenTypes: map[string]int64{
266+
"input_cached": 0,
267+
"output_reasoning": 0,
268+
"total_tokens": 225,
269+
},
270+
},
204271
},
205272
{
206273
name: "stream_error",

0 commit comments

Comments
 (0)