diff --git a/fixtures/fixtures.go b/fixtures/fixtures.go index af1e454..f38b299 100644 --- a/fixtures/fixtures.go +++ b/fixtures/fixtures.go @@ -80,6 +80,9 @@ var ( //go:embed openai/responses/streaming/builtin_tool.txtar OaiResponsesStreamingBuiltinTool []byte + //go:embed openai/responses/streaming/cached_input_tokens.txtar + OaiResponsesStreamingCachedInputTokens []byte + //go:embed openai/responses/streaming/custom_tool.txtar OaiResponsesStreamingCustomTool []byte diff --git a/fixtures/openai/responses/streaming/cached_input_tokens.txtar b/fixtures/openai/responses/streaming/cached_input_tokens.txtar new file mode 100644 index 0000000..cc908d5 --- /dev/null +++ b/fixtures/openai/responses/streaming/cached_input_tokens.txtar @@ -0,0 +1,47 @@ +-- request -- +{ + "model": "gpt-5.2-codex", + "input": "Test cached input tokens.", + "stream": true +} + +-- streaming -- +event: response.created +data: {"type":"response.created","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0} + +event: response.in_progress +data: {"type":"response.in_progress","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"in_progress","background":false,"completed_at":null,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"auto","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1} + +event: response.output_item.added +data: {"type":"response.output_item.added","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2} + +event: response.content_part.added +data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":""},"sequence_number":3} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","content_index":0,"delta":"Test","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":4} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","content_index":0,"delta":" response","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":5} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","content_index":0,"delta":" with","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":6} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","content_index":0,"delta":" cached","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":7} + +event: response.output_text.delta +data: {"type":"response.output_text.delta","content_index":0,"delta":" tokens.","item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"sequence_number":8} + +event: response.output_text.done +data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"text":"Test response with cached tokens.","sequence_number":9} + +event: response.content_part.done +data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","output_index":0,"part":{"type":"output_text","annotations":[],"text":"Test response with cached tokens."},"sequence_number":10} + +event: response.output_item.done +data: {"type":"response.output_item.done","item":{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"},"output_index":0,"sequence_number":11} + +event: response.completed +data: {"type":"response.completed","response":{"id":"resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35","object":"response","created_at":1768559625,"status":"completed","background":false,"completed_at":1768559627,"error":null,"instructions":null,"max_output_tokens":null,"max_tool_calls":null,"model":"gpt-5.2-codex","output":[{"id":"msg_05080461b406f3f501696a140a70d88195a2ce4c1a4eb39696","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Test response with cached tokens."}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":"019bc657-f77b-7292-b5f4-2e8d6c2b0945","prompt_cache_retention":null,"reasoning":{"effort":"medium","summary":"detailed"},"service_tier":"default","store":false,"temperature":1.0,"tool_choice":"auto","tools":[],"truncation":"disabled","usage":{"input_tokens":16909,"input_tokens_details":{"cached_tokens":15744},"output_tokens":54,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":16963},"user":null,"metadata":{}},"sequence_number":12} + diff --git a/intercept/responses/streaming.go b/intercept/responses/streaming.go index 8eedbb9..23793e3 100644 --- a/intercept/responses/streaming.go +++ b/intercept/responses/streaming.go @@ -118,8 +118,9 @@ func (i *StreamingResponsesInterceptor) ProcessRequest(w http.ResponseWriter, r i.recordUserPrompt(ctx, responseID) if completedResponse != nil { i.recordToolUsage(ctx, completedResponse) + i.recordTokenUsage(ctx, completedResponse) } else { - i.logger.Warn(ctx, "got empty response, skipping tool usage recording") + i.logger.Warn(ctx, "got empty response, skipping tool and token usage recording") } b, err := respCopy.readAll() diff --git a/responses_integration_test.go b/responses_integration_test.go index 51f620d..acef4fd 100644 --- a/responses_integration_test.go +++ b/responses_integration_test.go @@ -154,6 +154,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { streaming: true, expectModel: "gpt-4o-mini", expectPromptRecorded: "tell me a joke", + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_0f9c4b2f224d858000695fa062bf048197a680f357bbb09000", + Input: 11, + Output: 18, + ExtraTokenTypes: map[string]int64{ + "input_cached": 0, + "output_reasoning": 0, + "total_tokens": 29, + }, + }, }, { name: "streaming_codex", @@ -161,6 +171,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { streaming: true, expectModel: "gpt-5-codex", expectPromptRecorded: "hello", + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_0e172b76542a9100016964f7e63d888191a2a28cb2ba0ab6d3", + Input: 4006, + Output: 13, + ExtraTokenTypes: map[string]int64{ + "input_cached": 0, + "output_reasoning": 0, + "total_tokens": 4019, + }, + }, }, { name: "streaming_builtin_tool", @@ -174,6 +194,33 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { Args: map[string]any{"a": float64(3), "b": float64(5)}, Injected: false, }, + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_0c3fb28cfcf463a500695fa2f0239481a095ec6ce3dfe4d458", + Input: 58, + Output: 18, + ExtraTokenTypes: map[string]int64{ + "input_cached": 0, + "output_reasoning": 0, + "total_tokens": 76, + }, + }, + }, + { + name: "streaming_cached_tokens", + fixture: fixtures.OaiResponsesStreamingCachedInputTokens, + streaming: true, + expectModel: "gpt-5.2-codex", + expectPromptRecorded: "Test cached input tokens.", + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_05080461b406f3f501696a1409d34c8195a40ff4b092145c35", + Input: 1165, // 16909 input - 15744 cached + Output: 54, + ExtraTokenTypes: map[string]int64{ + "input_cached": 15744, + "output_reasoning": 0, + "total_tokens": 16963, + }, + }, }, { name: "streaming_custom_tool", @@ -187,6 +234,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { Args: "print(\"hello world\")", Injected: false, }, + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_0c26996bc41c2a0500696942e83634819fb71b2b8ff8a4a76c", + Input: 64, + Output: 340, + ExtraTokenTypes: map[string]int64{ + "input_cached": 0, + "output_reasoning": 320, + "total_tokens": 404, + }, + }, }, { name: "streaming_conversation", @@ -201,6 +258,16 @@ func TestResponsesOutputMatchesUpstream(t *testing.T) { streaming: true, expectModel: "gpt-4o-mini", expectPromptRecorded: "explain why this is funny.", + expectTokenUsage: &recorder.TokenUsageRecord{ + MsgID: "resp_0f9c4b2f224d858000695fa0649b8c8197b38914b15a7add0e", + Input: 43, + Output: 182, + ExtraTokenTypes: map[string]int64{ + "input_cached": 0, + "output_reasoning": 0, + "total_tokens": 225, + }, + }, }, { name: "stream_error",