Skip to content

Commit cb1e463

Browse files
committed
fix(llma): Responses API streaming tokens
1 parent 661909a commit cb1e463

File tree

2 files changed

+107
-23
lines changed

2 files changed

+107
-23
lines changed

posthog/ai/openai/openai_converter.py

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -273,11 +273,11 @@ def extract_openai_usage_from_chunk(
273273
"""
274274

275275
usage: StreamingUsageStats = {}
276-
277-
if not hasattr(chunk, "usage") or not chunk.usage:
278-
return usage
279-
276+
280277
if provider_type == "chat":
278+
if not hasattr(chunk, "usage") or not chunk.usage:
279+
return usage
280+
281281
# Chat Completions API uses prompt_tokens and completion_tokens
282282
usage["prompt_tokens"] = getattr(chunk.usage, "prompt_tokens", 0)
283283
usage["completion_tokens"] = getattr(chunk.usage, "completion_tokens", 0)
@@ -300,26 +300,29 @@ def extract_openai_usage_from_chunk(
300300
)
301301

302302
elif provider_type == "responses":
303-
# Responses API uses input_tokens and output_tokens
304-
usage["input_tokens"] = getattr(chunk.usage, "input_tokens", 0)
305-
usage["output_tokens"] = getattr(chunk.usage, "output_tokens", 0)
306-
usage["total_tokens"] = getattr(chunk.usage, "total_tokens", 0)
307-
308-
# Handle cached tokens
309-
if hasattr(chunk.usage, "input_tokens_details") and hasattr(
310-
chunk.usage.input_tokens_details, "cached_tokens"
311-
):
312-
usage["cache_read_input_tokens"] = (
313-
chunk.usage.input_tokens_details.cached_tokens
314-
)
303+
# For Responses API, usage is only in chunk.response.usage for completed events
304+
if hasattr(chunk, "type") and chunk.type == "response.completed":
305+
if hasattr(chunk, "response") and hasattr(chunk.response, "usage") and chunk.response.usage:
306+
response_usage = chunk.response.usage
307+
usage["input_tokens"] = getattr(response_usage, "input_tokens", 0)
308+
usage["output_tokens"] = getattr(response_usage, "output_tokens", 0)
309+
usage["total_tokens"] = getattr(response_usage, "total_tokens", 0)
310+
311+
# Handle cached tokens
312+
if hasattr(response_usage, "input_tokens_details") and hasattr(
313+
response_usage.input_tokens_details, "cached_tokens"
314+
):
315+
usage["cache_read_input_tokens"] = (
316+
response_usage.input_tokens_details.cached_tokens
317+
)
315318

316-
# Handle reasoning tokens
317-
if hasattr(chunk.usage, "output_tokens_details") and hasattr(
318-
chunk.usage.output_tokens_details, "reasoning_tokens"
319-
):
320-
usage["reasoning_tokens"] = (
321-
chunk.usage.output_tokens_details.reasoning_tokens
322-
)
319+
# Handle reasoning tokens
320+
if hasattr(response_usage, "output_tokens_details") and hasattr(
321+
response_usage.output_tokens_details, "reasoning_tokens"
322+
):
323+
usage["reasoning_tokens"] = (
324+
response_usage.output_tokens_details.reasoning_tokens
325+
)
323326

324327
return usage
325328

posthog/test/ai/openai/test_openai.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,87 @@ def test_responses_parse(mock_client, mock_parsed_response):
10331033
assert isinstance(props["$ai_latency"], float)
10341034

10351035

1036+
def test_responses_api_streaming_with_tokens(mock_client):
1037+
"""Test that Responses API streaming properly captures token usage from response.usage."""
1038+
from openai.types.responses import ResponseUsage
1039+
from unittest.mock import MagicMock
1040+
1041+
# Create mock response chunks with usage data in the correct location
1042+
chunks = []
1043+
1044+
# First chunk - just content, no usage
1045+
chunk1 = MagicMock()
1046+
chunk1.type = "response.text.delta"
1047+
chunk1.text = "Test "
1048+
chunks.append(chunk1)
1049+
1050+
# Second chunk - more content
1051+
chunk2 = MagicMock()
1052+
chunk2.type = "response.text.delta"
1053+
chunk2.text = "response"
1054+
chunks.append(chunk2)
1055+
1056+
# Final chunk - completed event with usage in response.usage
1057+
chunk3 = MagicMock()
1058+
chunk3.type = "response.completed"
1059+
chunk3.response = MagicMock()
1060+
chunk3.response.usage = ResponseUsage(
1061+
input_tokens=25,
1062+
output_tokens=30,
1063+
total_tokens=55,
1064+
input_tokens_details={"prompt_tokens": 25, "cached_tokens": 0},
1065+
output_tokens_details={"reasoning_tokens": 0},
1066+
)
1067+
chunk3.response.output = ["Test response"]
1068+
chunks.append(chunk3)
1069+
1070+
captured_kwargs = {}
1071+
1072+
def mock_streaming_response(**kwargs):
1073+
# Capture the kwargs to verify stream_options was NOT added
1074+
captured_kwargs.update(kwargs)
1075+
return iter(chunks)
1076+
1077+
with patch(
1078+
"openai.resources.responses.Responses.create",
1079+
side_effect=mock_streaming_response,
1080+
):
1081+
client = OpenAI(api_key="test-key", posthog_client=mock_client)
1082+
1083+
# Consume the streaming response
1084+
response = client.responses.create(
1085+
model="gpt-4o-mini",
1086+
input=[
1087+
{"role": "user", "content": "Test message"}
1088+
],
1089+
stream=True,
1090+
posthog_distinct_id="test-id",
1091+
posthog_properties={"test": "streaming"},
1092+
)
1093+
1094+
# Consume all chunks
1095+
list(response)
1096+
1097+
# Verify stream_options was NOT added (Responses API doesn't support it)
1098+
assert "stream_options" not in captured_kwargs
1099+
1100+
# Verify capture was called
1101+
assert mock_client.capture.call_count == 1
1102+
1103+
call_args = mock_client.capture.call_args[1]
1104+
props = call_args["properties"]
1105+
1106+
# Verify tokens are captured correctly from response.usage (not 0)
1107+
assert call_args["distinct_id"] == "test-id"
1108+
assert call_args["event"] == "$ai_generation"
1109+
assert props["$ai_provider"] == "openai"
1110+
assert props["$ai_model"] == "gpt-4o-mini"
1111+
assert props["$ai_input_tokens"] == 25 # Should not be 0
1112+
assert props["$ai_output_tokens"] == 30 # Should not be 0
1113+
assert props["test"] == "streaming"
1114+
assert isinstance(props["$ai_latency"], float)
1115+
1116+
10361117
def test_tool_definition(mock_client, mock_openai_response):
10371118
"""Test that tools defined in the create function are captured in $ai_tools property"""
10381119
with patch(

0 commit comments

Comments
 (0)