Skip to content

Commit 7e6257b

Browse files
committed
feat: cached tokens
1 parent f4c9971 commit 7e6257b

File tree

7 files changed

+146
-0
lines changed

7 files changed

+146
-0
lines changed

posthog/ai/anthropic/anthropic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ def generator():
125125
for k in [
126126
"input_tokens",
127127
"output_tokens",
128+
"cache_read_input_tokens",
129+
"cache_creation_input_tokens",
128130
]
129131
}
130132

posthog/ai/anthropic/anthropic_async.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ async def generator():
125125
for k in [
126126
"input_tokens",
127127
"output_tokens",
128+
"cache_read_input_tokens",
129+
"cache_creation_input_tokens",
128130
]
129131
}
130132

@@ -184,6 +186,8 @@ async def _capture_streaming_event(
184186
"$ai_http_status": 200,
185187
"$ai_input_tokens": usage_stats.get("input_tokens", 0),
186188
"$ai_output_tokens": usage_stats.get("output_tokens", 0),
189+
"$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
190+
"$ai_cache_creation_input_tokens": usage_stats.get("cache_creation_input_tokens", 0),
187191
"$ai_latency": latency,
188192
"$ai_trace_id": posthog_trace_id,
189193
"$ai_base_url": str(self._client.base_url),

posthog/ai/openai/openai.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def _create_streaming(
100100
def generator():
101101
nonlocal usage_stats
102102
nonlocal accumulated_content
103+
103104
try:
104105
for chunk in response:
105106
if hasattr(chunk, "usage") and chunk.usage:
@@ -111,6 +112,10 @@ def generator():
111112
"total_tokens",
112113
]
113114
}
115+
116+
# Add support for cached tokens
117+
if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(chunk.usage.prompt_tokens_details, "cached_tokens"):
118+
usage_stats["cache_read_input_tokens"] = chunk.usage.prompt_tokens_details.cached_tokens
114119

115120
if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
116121
content = chunk.choices[0].delta.content
@@ -165,6 +170,7 @@ def _capture_streaming_event(
165170
"$ai_http_status": 200,
166171
"$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
167172
"$ai_output_tokens": usage_stats.get("completion_tokens", 0),
173+
"$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
168174
"$ai_latency": latency,
169175
"$ai_trace_id": posthog_trace_id,
170176
"$ai_base_url": str(self._client.base_url),

posthog/ai/openai/openai_async.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ async def async_generator():
111111
"total_tokens",
112112
]
113113
}
114+
115+
# Add support for cached tokens
116+
if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(chunk.usage.prompt_tokens_details, "cached_tokens"):
117+
usage_stats["cache_read_input_tokens"] = chunk.usage.prompt_tokens_details.cached_tokens
118+
114119
if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
115120
content = chunk.choices[0].delta.content
116121
if content:
@@ -164,6 +169,7 @@ async def _capture_streaming_event(
164169
"$ai_http_status": 200,
165170
"$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
166171
"$ai_output_tokens": usage_stats.get("completion_tokens", 0),
172+
"$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
167173
"$ai_latency": latency,
168174
"$ai_trace_id": posthog_trace_id,
169175
"$ai_base_url": str(self._client.base_url),

posthog/ai/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,23 @@ def get_usage(response, provider: str) -> Dict[str, Any]:
3434
return {
3535
"input_tokens": response.usage.input_tokens,
3636
"output_tokens": response.usage.output_tokens,
37+
"cache_read_input_tokens": response.usage.cache_read_input_tokens,
38+
"cache_creation_input_tokens": response.usage.cache_creation_input_tokens,
3739
}
3840
elif provider == "openai":
41+
cached_tokens = 0
42+
if hasattr(response.usage, "prompt_tokens_details") and hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
43+
cached_tokens = response.usage.prompt_tokens_details.cached_tokens
3944
return {
4045
"input_tokens": response.usage.prompt_tokens,
4146
"output_tokens": response.usage.completion_tokens,
47+
"cache_read_input_tokens": cached_tokens,
4248
}
4349
return {
4450
"input_tokens": 0,
4551
"output_tokens": 0,
52+
"cache_read_input_tokens": 0,
53+
"cache_creation_input_tokens": 0,
4654
}
4755

4856

@@ -157,6 +165,12 @@ def call_llm_and_track_usage(
157165
**(error_params or {}),
158166
}
159167

168+
if usage.get("cache_read_input_tokens", 0) > 0:
169+
event_properties["$ai_cache_read_input_tokens"] = usage.get("cache_read_input_tokens", 0)
170+
171+
if usage.get("cache_creation_input_tokens", 0) > 0:
172+
event_properties["$ai_cache_creation_input_tokens"] = usage.get("cache_creation_input_tokens", 0)
173+
160174
if posthog_distinct_id is None:
161175
event_properties["$process_person_profile"] = False
162176

@@ -233,6 +247,12 @@ async def call_llm_and_track_usage_async(
233247
**(error_params or {}),
234248
}
235249

250+
if usage.get("cache_read_input_tokens", 0) > 0:
251+
event_properties["$ai_cache_read_input_tokens"] = usage.get("cache_read_input_tokens", 0)
252+
253+
if usage.get("cache_creation_input_tokens", 0) > 0:
254+
event_properties["$ai_cache_creation_input_tokens"] = usage.get("cache_creation_input_tokens", 0)
255+
236256
if posthog_distinct_id is None:
237257
event_properties["$process_person_profile"] = False
238258

posthog/test/ai/anthropic/test_anthropic.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,27 @@ def stream_generator():
5454

5555
return stream_generator()
5656

57+
@pytest.fixture
58+
def mock_anthropic_response_with_cached_tokens():
59+
# Create a mock Usage object with cached_tokens in input_tokens_details
60+
usage = Usage(
61+
input_tokens=20,
62+
output_tokens=10,
63+
cache_read_input_tokens=15,
64+
cache_creation_input_tokens=2,
65+
)
66+
67+
return Message(
68+
id="msg_123",
69+
type="message",
70+
role="assistant",
71+
content=[{"type": "text", "text": "Test response"}],
72+
model="claude-3-opus-20240229",
73+
usage=usage,
74+
stop_reason="end_turn",
75+
stop_sequence=None,
76+
)
77+
5778

5879
def test_basic_completion(mock_client, mock_anthropic_response):
5980
with patch("anthropic.resources.Messages.create", return_value=mock_anthropic_response):
@@ -339,3 +360,34 @@ def test_error(mock_client, mock_anthropic_response):
339360
props = call_args["properties"]
340361
assert props["$ai_is_error"] is True
341362
assert props["$ai_error"] == "Test error"
363+
364+
365+
def test_cached_tokens(mock_client, mock_anthropic_response_with_cached_tokens):
366+
with patch("anthropic.resources.Messages.create", return_value=mock_anthropic_response_with_cached_tokens):
367+
client = Anthropic(api_key="test-key", posthog_client=mock_client)
368+
response = client.messages.create(
369+
model="claude-3-opus-20240229",
370+
messages=[{"role": "user", "content": "Hello"}],
371+
posthog_distinct_id="test-id",
372+
posthog_properties={"foo": "bar"},
373+
)
374+
375+
assert response == mock_anthropic_response_with_cached_tokens
376+
assert mock_client.capture.call_count == 1
377+
378+
call_args = mock_client.capture.call_args[1]
379+
props = call_args["properties"]
380+
381+
assert call_args["distinct_id"] == "test-id"
382+
assert call_args["event"] == "$ai_generation"
383+
assert props["$ai_provider"] == "anthropic"
384+
assert props["$ai_model"] == "claude-3-opus-20240229"
385+
assert props["$ai_input"] == [{"role": "user", "content": "Hello"}]
386+
assert props["$ai_output_choices"] == [{"role": "assistant", "content": "Test response"}]
387+
assert props["$ai_input_tokens"] == 20
388+
assert props["$ai_output_tokens"] == 10
389+
assert props["$ai_cache_read_input_tokens"] == 15
390+
assert props["$ai_cache_creation_input_tokens"] == 2
391+
assert props["$ai_http_status"] == 200
392+
assert props["foo"] == "bar"
393+
assert isinstance(props["$ai_latency"], float)

posthog/test/ai/openai/test_openai.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,32 @@ def mock_embedding_response():
6262
)
6363

6464

65+
@pytest.fixture
66+
def mock_openai_response_with_cached_tokens():
67+
return ChatCompletion(
68+
id="test",
69+
model="gpt-4",
70+
object="chat.completion",
71+
created=int(time.time()),
72+
choices=[
73+
Choice(
74+
finish_reason="stop",
75+
index=0,
76+
message=ChatCompletionMessage(
77+
content="Test response",
78+
role="assistant",
79+
),
80+
)
81+
],
82+
usage=CompletionUsage(
83+
completion_tokens=10,
84+
prompt_tokens=20,
85+
total_tokens=30,
86+
prompt_tokens_details={"cached_tokens": 15},
87+
),
88+
)
89+
90+
6591
def test_basic_completion(mock_client, mock_openai_response):
6692
with patch("openai.resources.chat.completions.Completions.create", return_value=mock_openai_response):
6793
client = OpenAI(api_key="test-key", posthog_client=mock_client)
@@ -187,3 +213,33 @@ def test_error(mock_client, mock_openai_response):
187213
props = call_args["properties"]
188214
assert props["$ai_is_error"] is True
189215
assert props["$ai_error"] == "Test error"
216+
217+
218+
def test_cached_tokens(mock_client, mock_openai_response_with_cached_tokens):
219+
with patch("openai.resources.chat.completions.Completions.create", return_value=mock_openai_response_with_cached_tokens):
220+
client = OpenAI(api_key="test-key", posthog_client=mock_client)
221+
response = client.chat.completions.create(
222+
model="gpt-4",
223+
messages=[{"role": "user", "content": "Hello"}],
224+
posthog_distinct_id="test-id",
225+
posthog_properties={"foo": "bar"},
226+
)
227+
228+
assert response == mock_openai_response_with_cached_tokens
229+
assert mock_client.capture.call_count == 1
230+
231+
call_args = mock_client.capture.call_args[1]
232+
props = call_args["properties"]
233+
234+
assert call_args["distinct_id"] == "test-id"
235+
assert call_args["event"] == "$ai_generation"
236+
assert props["$ai_provider"] == "openai"
237+
assert props["$ai_model"] == "gpt-4"
238+
assert props["$ai_input"] == [{"role": "user", "content": "Hello"}]
239+
assert props["$ai_output_choices"] == [{"role": "assistant", "content": "Test response"}]
240+
assert props["$ai_input_tokens"] == 20
241+
assert props["$ai_output_tokens"] == 10
242+
assert props["$ai_cached_tokens"] == 15
243+
assert props["$ai_http_status"] == 200
244+
assert props["foo"] == "bar"
245+
assert isinstance(props["$ai_latency"], float)

0 commit comments

Comments
 (0)