Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fixtures/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ var (
//go:embed openai/responses/streaming/builtin_tool.txtar
OaiResponsesStreamingBuiltinTool []byte

//go:embed openai/responses/streaming/cached_input_tokens.txtar
OaiResponsesStreamingCachedInputTokens []byte

//go:embed openai/responses/streaming/custom_tool.txtar
OaiResponsesStreamingCustomTool []byte

Expand Down
47 changes: 47 additions & 0 deletions fixtures/openai/responses/streaming/cached_input_tokens.txtar
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- request --
{
"model": "gpt-5.2-codex",
"input": "Test cached input tokens.",
"stream": true
}

-- streaming --
event: response.created
data: {"type":"response.created","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}

event: response.in_progress
data: {"type":"response.in_progress","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}

event: response.output_item.added
data: {"type":"response.output_item.added","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}

event: response.content_part.added
data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":""},"sequence_number":3}

event: response.output_text.delta
data: {"type":"response.output_text.delta","content_index":0,"delta":"Test","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":4}

event: response.output_text.delta
data: {"type":"response.output_text.delta","content_index":0,"delta":" response","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":5}

event: response.output_text.delta
data: {"type":"response.output_text.delta","content_index":0,"delta":" with","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":6}

event: response.output_text.delta
data: {"type":"response.output_text.delta","content_index":0,"delta":" cached","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":7}

event: response.output_text.delta
data: {"type":"response.output_text.delta","content_index":0,"delta":" tokens.","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":8}

event: response.output_text.done
data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"text":"Test response with cached tokens.","sequence_number":9}

event: response.content_part.done
data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":"Test response with cached tokens."},"sequence_number":10}

event: response.output_item.done
data: {"type":"response.output_item.done","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"},"output_index":0,"sequence_number":11}

event: response.completed
data: {"type":"response.completed","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"completed","background":false,"completed_at":1768559627,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":"019bc657-f77b-7292-b5f4-2e8d6c2b0945","prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"default","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":{"input_tokens":16909,"input_tokens_details":{"cached_tokens":15744},"output_tokens":54,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":16963},"user":null,"metadata":{}},"sequence_number":12}

3 changes: 2 additions & 1 deletion intercept/responses/streaming.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,9 @@ func (i *StreamingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r
i.recordUserPrompt(ctx, responseID)
if completedResponse != nil {
i.recordToolUsage(ctx, completedResponse)
i.recordTokenUsage(ctx, completedResponse)
} else {
i.logger.Warn(ctx, "got empty response, skipping tool usage recording")
i.logger.Warn(ctx, "got empty response, skipping tool and token usage recording")
}

b, err := respCopy.readAll()
Expand Down
67 changes: 67 additions & 0 deletions responses_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,33 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
streaming: true,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "tell me a joke",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000",
Input: 11,
Output: 18,
ExtraTokenTypes: map[string]int64{
"input_cached": 0,
"output_reasoning": 0,
"total_tokens": 29,
},
},
},
{
name: "streaming_codex",
fixture: fixtures.OaiResponsesStreamingCodex,
streaming: true,
expectModel: "gpt-5-codex",
expectPromptRecorded: "hello",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0e172b76542a9100016964f7e63d888191a2a28cb2ba0ab6d3",
Input: 4006,
Output: 13,
ExtraTokenTypes: map[string]int64{
"input_cached": 0,
"output_reasoning": 0,
"total_tokens": 4019,
},
},
},
{
name: "streaming_builtin_tool",
Expand All @@ -174,6 +194,33 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
Args: map[string]any{"a": float64(3), "b": float64(5)},
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458",
Input: 58,
Output: 18,
ExtraTokenTypes: map[string]int64{
"input_cached": 0,
"output_reasoning": 0,
"total_tokens": 76,
},
},
},
{
name: "streaming_cached_tokens",
fixture: fixtures.OaiResponsesStreamingCachedInputTokens,
streaming: true,
expectModel: "gpt-5.2-codex",
expectPromptRecorded: "Test cached input tokens.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35",
Input: 1165, // 16909 input - 15744 cached
Output: 54,
ExtraTokenTypes: map[string]int64{
"input_cached": 15744,
"output_reasoning": 0,
"total_tokens": 16963,
},
},
},
{
name: "streaming_custom_tool",
Expand All @@ -187,6 +234,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
Args: "print(\"hello world\")",
Injected: false,
},
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0c26996bc41c2a0500696942e83634819fb71b2b8ff8a4a76c",
Input: 64,
Output: 340,
ExtraTokenTypes: map[string]int64{
"input_cached": 0,
"output_reasoning": 320,
"total_tokens": 404,
},
},
},
{
name: "streaming_conversation",
Expand All @@ -201,6 +258,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) {
streaming: true,
expectModel: "gpt-4o-mini",
expectPromptRecorded: "explain why this is funny.",
expectTokenUsage: &recorder.TokenUsageRecord{
MsgID: "resp_0f9c4b2f224d858000695fa0649b8c8197b38914b15a7add0e",
Input: 43,
Output: 182,
ExtraTokenTypes: map[string]int64{
"input_cached": 0,
"output_reasoning": 0,
"total_tokens": 225,
},
},
},
{
name: "stream_error",
Expand Down