feat: cached tokens

k11kirky · k11kirky · commit 7e6257b65c36 · 2025-03-03T15:10:35.000-08:00
diff --git a/posthog/ai/anthropic/anthropic.py b/posthog/ai/anthropic/anthropic.py
@@ -125,6 +125,8 @@ def generator():
                             for k in [
                                 "input_tokens",
                                 "output_tokens",
+                                "cache_read_input_tokens",
+                                "cache_creation_input_tokens",
                             ]
                         }
 
diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py
@@ -125,6 +125,8 @@ async def generator():
                             for k in [
                                 "input_tokens",
                                 "output_tokens",
+                                "cache_read_input_tokens",
+                                "cache_creation_input_tokens",
                             ]
                         }
 
@@ -184,6 +186,8 @@ async def _capture_streaming_event(
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("input_tokens", 0),
             "$ai_output_tokens": usage_stats.get("output_tokens", 0),
+            "$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
+            "$ai_cache_creation_input_tokens": usage_stats.get("cache_creation_input_tokens", 0),
             "$ai_latency": latency,
             "$ai_trace_id": posthog_trace_id,
             "$ai_base_url": str(self._client.base_url),
diff --git a/posthog/ai/openai/openai.py b/posthog/ai/openai/openai.py
@@ -100,6 +100,7 @@ def _create_streaming(
         def generator():
             nonlocal usage_stats
             nonlocal accumulated_content
+
             try:
                 for chunk in response:
                     if hasattr(chunk, "usage") and chunk.usage:
@@ -111,6 +112,10 @@ def generator():
                                 "total_tokens",
                             ]
                         }
+                        
+                        # Add support for cached tokens
+                        if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(chunk.usage.prompt_tokens_details, "cached_tokens"):
+                            usage_stats["cache_read_input_tokens"] = chunk.usage.prompt_tokens_details.cached_tokens
 
                     if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
                         content = chunk.choices[0].delta.content
@@ -165,6 +170,7 @@ def _capture_streaming_event(
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
             "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
+            "$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
             "$ai_latency": latency,
             "$ai_trace_id": posthog_trace_id,
             "$ai_base_url": str(self._client.base_url),
diff --git a/posthog/ai/openai/openai_async.py b/posthog/ai/openai/openai_async.py
@@ -111,6 +111,11 @@ async def async_generator():
                                 "total_tokens",
                             ]
                         }
+
+                        # Add support for cached tokens
+                        if hasattr(chunk.usage, "prompt_tokens_details") and hasattr(chunk.usage.prompt_tokens_details, "cached_tokens"):
+                            usage_stats["cache_read_input_tokens"] = chunk.usage.prompt_tokens_details.cached_tokens
+
                     if hasattr(chunk, "choices") and chunk.choices and len(chunk.choices) > 0:
                         content = chunk.choices[0].delta.content
                         if content:
@@ -164,6 +169,7 @@ async def _capture_streaming_event(
             "$ai_http_status": 200,
             "$ai_input_tokens": usage_stats.get("prompt_tokens", 0),
             "$ai_output_tokens": usage_stats.get("completion_tokens", 0),
+            "$ai_cache_read_input_tokens": usage_stats.get("cache_read_input_tokens", 0),
             "$ai_latency": latency,
             "$ai_trace_id": posthog_trace_id,
             "$ai_base_url": str(self._client.base_url),
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
@@ -34,15 +34,23 @@ def get_usage(response, provider: str) -> Dict[str, Any]:
         return {
             "input_tokens": response.usage.input_tokens,
             "output_tokens": response.usage.output_tokens,
+            "cache_read_input_tokens": response.usage.cache_read_input_tokens,
+            "cache_creation_input_tokens": response.usage.cache_creation_input_tokens,
         }
     elif provider == "openai":
+        cached_tokens = 0
+        if hasattr(response.usage, "prompt_tokens_details") and hasattr(response.usage.prompt_tokens_details, "cached_tokens"):
+            cached_tokens = response.usage.prompt_tokens_details.cached_tokens
         return {
             "input_tokens": response.usage.prompt_tokens,
             "output_tokens": response.usage.completion_tokens,
+            "cache_read_input_tokens": cached_tokens,
         }
     return {
         "input_tokens": 0,
         "output_tokens": 0,
+        "cache_read_input_tokens": 0,
+        "cache_creation_input_tokens": 0,
     }
 
 
@@ -157,6 +165,12 @@ def call_llm_and_track_usage(
             **(error_params or {}),
         }
 
+        if usage.get("cache_read_input_tokens", 0) > 0:
+            event_properties["$ai_cache_read_input_tokens"] = usage.get("cache_read_input_tokens", 0)
+
+        if usage.get("cache_creation_input_tokens", 0) > 0:
+            event_properties["$ai_cache_creation_input_tokens"] = usage.get("cache_creation_input_tokens", 0)
+
         if posthog_distinct_id is None:
             event_properties["$process_person_profile"] = False
 
@@ -233,6 +247,12 @@ async def call_llm_and_track_usage_async(
             **(error_params or {}),
         }
 
+        if usage.get("cache_read_input_tokens", 0) > 0:
+            event_properties["$ai_cache_read_input_tokens"] = usage.get("cache_read_input_tokens", 0)
+
+        if usage.get("cache_creation_input_tokens", 0) > 0:
+            event_properties["$ai_cache_creation_input_tokens"] = usage.get("cache_creation_input_tokens", 0)
+
         if posthog_distinct_id is None:
             event_properties["$process_person_profile"] = False
 
diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py
@@ -54,6 +54,27 @@ def stream_generator():
 
     return stream_generator()
 
+@pytest.fixture
+def mock_anthropic_response_with_cached_tokens():
+    # Create a mock Usage object with cached_tokens in input_tokens_details
+    usage = Usage(
+        input_tokens=20,
+        output_tokens=10,
+        cache_read_input_tokens=15,
+        cache_creation_input_tokens=2,
+    )
+    
+    return Message(
+        id="msg_123",
+        type="message",
+        role="assistant",
+        content=[{"type": "text", "text": "Test response"}],
+        model="claude-3-opus-20240229",
+        usage=usage,
+        stop_reason="end_turn",
+        stop_sequence=None,
+    )
+
 
 def test_basic_completion(mock_client, mock_anthropic_response):
     with patch("anthropic.resources.Messages.create", return_value=mock_anthropic_response):
@@ -339,3 +360,34 @@ def test_error(mock_client, mock_anthropic_response):
         props = call_args["properties"]
         assert props["$ai_is_error"] is True
         assert props["$ai_error"] == "Test error"
+
+
+def test_cached_tokens(mock_client, mock_anthropic_response_with_cached_tokens):
+    with patch("anthropic.resources.Messages.create", return_value=mock_anthropic_response_with_cached_tokens):
+        client = Anthropic(api_key="test-key", posthog_client=mock_client)
+        response = client.messages.create(
+            model="claude-3-opus-20240229",
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+            posthog_properties={"foo": "bar"},
+        )
+
+        assert response == mock_anthropic_response_with_cached_tokens
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        assert call_args["distinct_id"] == "test-id"
+        assert call_args["event"] == "$ai_generation"
+        assert props["$ai_provider"] == "anthropic"
+        assert props["$ai_model"] == "claude-3-opus-20240229"
+        assert props["$ai_input"] == [{"role": "user", "content": "Hello"}]
+        assert props["$ai_output_choices"] == [{"role": "assistant", "content": "Test response"}]
+        assert props["$ai_input_tokens"] == 20
+        assert props["$ai_output_tokens"] == 10
+        assert props["$ai_cache_read_input_tokens"] == 15
+        assert props["$ai_cache_creation_input_tokens"] == 2
+        assert props["$ai_http_status"] == 200
+        assert props["foo"] == "bar"
+        assert isinstance(props["$ai_latency"], float)
diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py
@@ -62,6 +62,32 @@ def mock_embedding_response():
     )
 
 
+@pytest.fixture
+def mock_openai_response_with_cached_tokens():
+    return ChatCompletion(
+        id="test",
+        model="gpt-4",
+        object="chat.completion",
+        created=int(time.time()),
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    content="Test response",
+                    role="assistant",
+                ),
+            )
+        ],
+        usage=CompletionUsage(
+            completion_tokens=10,
+            prompt_tokens=20,
+            total_tokens=30,
+            prompt_tokens_details={"cached_tokens": 15},
+        ),
+    )
+
+
 def test_basic_completion(mock_client, mock_openai_response):
     with patch("openai.resources.chat.completions.Completions.create", return_value=mock_openai_response):
         client = OpenAI(api_key="test-key", posthog_client=mock_client)
@@ -187,3 +213,33 @@ def test_error(mock_client, mock_openai_response):
         props = call_args["properties"]
         assert props["$ai_is_error"] is True
         assert props["$ai_error"] == "Test error"
+
+
+def test_cached_tokens(mock_client, mock_openai_response_with_cached_tokens):
+    with patch("openai.resources.chat.completions.Completions.create", return_value=mock_openai_response_with_cached_tokens):
+        client = OpenAI(api_key="test-key", posthog_client=mock_client)
+        response = client.chat.completions.create(
+            model="gpt-4",
+            messages=[{"role": "user", "content": "Hello"}],
+            posthog_distinct_id="test-id",
+            posthog_properties={"foo": "bar"},
+        )
+
+        assert response == mock_openai_response_with_cached_tokens
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        assert call_args["distinct_id"] == "test-id"
+        assert call_args["event"] == "$ai_generation"
+        assert props["$ai_provider"] == "openai"
+        assert props["$ai_model"] == "gpt-4"
+        assert props["$ai_input"] == [{"role": "user", "content": "Hello"}]
+        assert props["$ai_output_choices"] == [{"role": "assistant", "content": "Test response"}]
+        assert props["$ai_input_tokens"] == 20
+        assert props["$ai_output_tokens"] == 10
+        assert props["$ai_cached_tokens"] == 15
+        assert props["$ai_http_status"] == 200
+        assert props["foo"] == "bar"
+        assert isinstance(props["$ai_latency"], float)

Original file line number	Diff line number	Diff line change
`@@ -125,6 +125,8 @@ def generator():`
`125`	`125`	`for k in [`
`126`	`126`	`"input_tokens",`
`127`	`127`	`"output_tokens",`
	`128`	`+ "cache_read_input_tokens",`
	`129`	`+ "cache_creation_input_tokens",`
`128`	`130`	`]`
`129`	`131`	`}`
`130`	`132`