Skip to content

Commit b3e21c1

Browse files
authored
fix(llma): gemini missing cached and reasoning tokens (#323)
* fix(llma): Gemini missing cached and reasoning tokens * chore(llma): bump version * chore(llma): run ruff
1 parent 08b11cb commit b3e21c1

File tree

14 files changed

+372
-256
lines changed

14 files changed

+372
-256
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 6.7.3 - 2025-09-04
2+
3+
- fix: missing usage tokens in Gemini
4+
15
# 6.7.2 - 2025-09-03
26

37
- fix: tool call results in streaming providers

posthog/ai/anthropic/anthropic.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import uuid
1111
from typing import Any, Dict, List, Optional
1212

13-
from posthog.ai.types import StreamingContentBlock, ToolInProgress
13+
from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
1414
from posthog.ai.utils import (
1515
call_llm_and_track_usage,
1616
merge_usage_stats,
@@ -126,7 +126,7 @@ def _create_streaming(
126126
**kwargs: Any,
127127
):
128128
start_time = time.time()
129-
usage_stats: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0}
129+
usage_stats: TokenUsage = TokenUsage(input_tokens=0, output_tokens=0)
130130
accumulated_content = ""
131131
content_blocks: List[StreamingContentBlock] = []
132132
tools_in_progress: Dict[str, ToolInProgress] = {}
@@ -210,14 +210,13 @@ def _capture_streaming_event(
210210
posthog_privacy_mode: bool,
211211
posthog_groups: Optional[Dict[str, Any]],
212212
kwargs: Dict[str, Any],
213-
usage_stats: Dict[str, int],
213+
usage_stats: TokenUsage,
214214
latency: float,
215215
content_blocks: List[StreamingContentBlock],
216216
accumulated_content: str,
217217
):
218218
from posthog.ai.types import StreamingEventData
219219
from posthog.ai.anthropic.anthropic_converter import (
220-
standardize_anthropic_usage,
221220
format_anthropic_streaming_input,
222221
format_anthropic_streaming_output_complete,
223222
)
@@ -236,7 +235,7 @@ def _capture_streaming_event(
236235
formatted_output=format_anthropic_streaming_output_complete(
237236
content_blocks, accumulated_content
238237
),
239-
usage_stats=standardize_anthropic_usage(usage_stats),
238+
usage_stats=usage_stats,
240239
latency=latency,
241240
distinct_id=posthog_distinct_id,
242241
trace_id=posthog_trace_id,

posthog/ai/anthropic/anthropic_async.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from typing import Any, Dict, List, Optional
1212

1313
from posthog import setup
14-
from posthog.ai.types import StreamingContentBlock, ToolInProgress
14+
from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
1515
from posthog.ai.utils import (
1616
call_llm_and_track_usage_async,
1717
extract_available_tool_calls,
@@ -131,7 +131,7 @@ async def _create_streaming(
131131
**kwargs: Any,
132132
):
133133
start_time = time.time()
134-
usage_stats: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0}
134+
usage_stats: TokenUsage = TokenUsage(input_tokens=0, output_tokens=0)
135135
accumulated_content = ""
136136
content_blocks: List[StreamingContentBlock] = []
137137
tools_in_progress: Dict[str, ToolInProgress] = {}
@@ -215,7 +215,7 @@ async def _capture_streaming_event(
215215
posthog_privacy_mode: bool,
216216
posthog_groups: Optional[Dict[str, Any]],
217217
kwargs: Dict[str, Any],
218-
usage_stats: Dict[str, int],
218+
usage_stats: TokenUsage,
219219
latency: float,
220220
content_blocks: List[StreamingContentBlock],
221221
accumulated_content: str,

posthog/ai/anthropic/anthropic_converter.py

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
FormattedMessage,
1515
FormattedTextContent,
1616
StreamingContentBlock,
17-
StreamingUsageStats,
1817
TokenUsage,
1918
ToolInProgress,
2019
)
@@ -164,7 +163,38 @@ def format_anthropic_streaming_content(
164163
return formatted
165164

166165

167-
def extract_anthropic_usage_from_event(event: Any) -> StreamingUsageStats:
166+
def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
167+
"""
168+
Extract usage from a full Anthropic response (non-streaming).
169+
170+
Args:
171+
response: The complete response from Anthropic API
172+
173+
Returns:
174+
TokenUsage with standardized usage
175+
"""
176+
if not hasattr(response, "usage"):
177+
return TokenUsage(input_tokens=0, output_tokens=0)
178+
179+
result = TokenUsage(
180+
input_tokens=getattr(response.usage, "input_tokens", 0),
181+
output_tokens=getattr(response.usage, "output_tokens", 0),
182+
)
183+
184+
if hasattr(response.usage, "cache_read_input_tokens"):
185+
cache_read = response.usage.cache_read_input_tokens
186+
if cache_read and cache_read > 0:
187+
result["cache_read_input_tokens"] = cache_read
188+
189+
if hasattr(response.usage, "cache_creation_input_tokens"):
190+
cache_creation = response.usage.cache_creation_input_tokens
191+
if cache_creation and cache_creation > 0:
192+
result["cache_creation_input_tokens"] = cache_creation
193+
194+
return result
195+
196+
197+
def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
168198
"""
169199
Extract usage statistics from an Anthropic streaming event.
170200
@@ -175,7 +205,7 @@ def extract_anthropic_usage_from_event(event: Any) -> StreamingUsageStats:
175205
Dictionary of usage statistics
176206
"""
177207

178-
usage: StreamingUsageStats = {}
208+
usage: TokenUsage = TokenUsage()
179209

180210
# Handle usage stats from message_start event
181211
if hasattr(event, "type") and event.type == "message_start":
@@ -329,26 +359,6 @@ def finalize_anthropic_tool_input(
329359
del tools_in_progress[block["id"]]
330360

331361

332-
def standardize_anthropic_usage(usage: Dict[str, Any]) -> TokenUsage:
333-
"""
334-
Standardize Anthropic usage statistics to common TokenUsage format.
335-
336-
Anthropic already uses standard field names, so this mainly structures the data.
337-
338-
Args:
339-
usage: Raw usage statistics from Anthropic
340-
341-
Returns:
342-
Standardized TokenUsage dict
343-
"""
344-
return TokenUsage(
345-
input_tokens=usage.get("input_tokens", 0),
346-
output_tokens=usage.get("output_tokens", 0),
347-
cache_read_input_tokens=usage.get("cache_read_input_tokens"),
348-
cache_creation_input_tokens=usage.get("cache_creation_input_tokens"),
349-
)
350-
351-
352362
def format_anthropic_streaming_input(kwargs: Dict[str, Any]) -> Any:
353363
"""
354364
Format Anthropic streaming input using system prompt merging.

posthog/ai/gemini/gemini.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import uuid
44
from typing import Any, Dict, Optional
55

6+
from posthog.ai.types import TokenUsage
7+
68
try:
79
from google import genai
810
except ImportError:
@@ -294,7 +296,7 @@ def _generate_content_streaming(
294296
**kwargs: Any,
295297
):
296298
start_time = time.time()
297-
usage_stats: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0}
299+
usage_stats: TokenUsage = TokenUsage(input_tokens=0, output_tokens=0)
298300
accumulated_content = []
299301

300302
kwargs_without_stream = {"model": model, "contents": contents, **kwargs}
@@ -350,12 +352,11 @@ def _capture_streaming_event(
350352
privacy_mode: bool,
351353
groups: Optional[Dict[str, Any]],
352354
kwargs: Dict[str, Any],
353-
usage_stats: Dict[str, int],
355+
usage_stats: TokenUsage,
354356
latency: float,
355357
output: Any,
356358
):
357359
from posthog.ai.types import StreamingEventData
358-
from posthog.ai.gemini.gemini_converter import standardize_gemini_usage
359360

360361
# Prepare standardized event data
361362
formatted_input = self._format_input(contents)
@@ -368,7 +369,7 @@ def _capture_streaming_event(
368369
kwargs=kwargs,
369370
formatted_input=sanitized_input,
370371
formatted_output=format_gemini_streaming_output(output),
371-
usage_stats=standardize_gemini_usage(usage_stats),
372+
usage_stats=usage_stats,
372373
latency=latency,
373374
distinct_id=distinct_id,
374375
trace_id=trace_id,

posthog/ai/gemini/gemini_converter.py

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from posthog.ai.types import (
1111
FormattedContentItem,
1212
FormattedMessage,
13-
StreamingUsageStats,
1413
TokenUsage,
1514
)
1615

@@ -283,29 +282,71 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
283282
return [_format_object_message(contents)]
284283

285284

286-
def extract_gemini_usage_from_chunk(chunk: Any) -> StreamingUsageStats:
285+
def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
286+
"""
287+
Common logic to extract usage from Gemini metadata.
288+
Used by both streaming and non-streaming paths.
289+
290+
Args:
291+
metadata: usage_metadata from Gemini response or chunk
292+
293+
Returns:
294+
TokenUsage with standardized usage
295+
"""
296+
usage = TokenUsage(
297+
input_tokens=getattr(metadata, "prompt_token_count", 0),
298+
output_tokens=getattr(metadata, "candidates_token_count", 0),
299+
)
300+
301+
# Add cache tokens if present (don't add if 0)
302+
if hasattr(metadata, "cached_content_token_count"):
303+
cache_tokens = metadata.cached_content_token_count
304+
if cache_tokens and cache_tokens > 0:
305+
usage["cache_read_input_tokens"] = cache_tokens
306+
307+
# Add reasoning tokens if present (don't add if 0)
308+
if hasattr(metadata, "thoughts_token_count"):
309+
reasoning_tokens = metadata.thoughts_token_count
310+
if reasoning_tokens and reasoning_tokens > 0:
311+
usage["reasoning_tokens"] = reasoning_tokens
312+
313+
return usage
314+
315+
316+
def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
317+
"""
318+
Extract usage statistics from a full Gemini response (non-streaming).
319+
320+
Args:
321+
response: The complete response from Gemini API
322+
323+
Returns:
324+
TokenUsage with standardized usage statistics
325+
"""
326+
if not hasattr(response, "usage_metadata") or not response.usage_metadata:
327+
return TokenUsage(input_tokens=0, output_tokens=0)
328+
329+
return _extract_usage_from_metadata(response.usage_metadata)
330+
331+
332+
def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
287333
"""
288334
Extract usage statistics from a Gemini streaming chunk.
289335
290336
Args:
291337
chunk: Streaming chunk from Gemini API
292338
293339
Returns:
294-
Dictionary of usage statistics
340+
TokenUsage with standardized usage statistics
295341
"""
296342

297-
usage: StreamingUsageStats = {}
343+
usage: TokenUsage = TokenUsage()
298344

299345
if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
300346
return usage
301347

302-
# Gemini uses prompt_token_count and candidates_token_count
303-
usage["input_tokens"] = getattr(chunk.usage_metadata, "prompt_token_count", 0)
304-
usage["output_tokens"] = getattr(chunk.usage_metadata, "candidates_token_count", 0)
305-
306-
# Calculate total if both values are defined (including 0)
307-
if "input_tokens" in usage and "output_tokens" in usage:
308-
usage["total_tokens"] = usage["input_tokens"] + usage["output_tokens"]
348+
# Use the shared helper to extract usage
349+
usage = _extract_usage_from_metadata(chunk.usage_metadata)
309350

310351
return usage
311352

@@ -417,22 +458,3 @@ def format_gemini_streaming_output(
417458

418459
# Fallback for empty or unexpected input
419460
return [{"role": "assistant", "content": [{"type": "text", "text": ""}]}]
420-
421-
422-
def standardize_gemini_usage(usage: Dict[str, Any]) -> TokenUsage:
423-
"""
424-
Standardize Gemini usage statistics to common TokenUsage format.
425-
426-
Gemini already uses standard field names (input_tokens/output_tokens).
427-
428-
Args:
429-
usage: Raw usage statistics from Gemini
430-
431-
Returns:
432-
Standardized TokenUsage dict
433-
"""
434-
return TokenUsage(
435-
input_tokens=usage.get("input_tokens", 0),
436-
output_tokens=usage.get("output_tokens", 0),
437-
# Gemini doesn't currently support cache or reasoning tokens
438-
)

posthog/ai/openai/openai.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import uuid
33
from typing import Any, Dict, List, Optional
44

5+
from posthog.ai.types import TokenUsage
6+
57
try:
68
import openai
79
except ImportError:
@@ -120,7 +122,7 @@ def _create_streaming(
120122
**kwargs: Any,
121123
):
122124
start_time = time.time()
123-
usage_stats: Dict[str, int] = {}
125+
usage_stats: TokenUsage = TokenUsage()
124126
final_content = []
125127
response = self._original.create(**kwargs)
126128

@@ -171,14 +173,13 @@ def _capture_streaming_event(
171173
posthog_privacy_mode: bool,
172174
posthog_groups: Optional[Dict[str, Any]],
173175
kwargs: Dict[str, Any],
174-
usage_stats: Dict[str, int],
176+
usage_stats: TokenUsage,
175177
latency: float,
176178
output: Any,
177179
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
178180
):
179181
from posthog.ai.types import StreamingEventData
180182
from posthog.ai.openai.openai_converter import (
181-
standardize_openai_usage,
182183
format_openai_streaming_input,
183184
format_openai_streaming_output,
184185
)
@@ -195,7 +196,7 @@ def _capture_streaming_event(
195196
kwargs=kwargs,
196197
formatted_input=sanitized_input,
197198
formatted_output=format_openai_streaming_output(output, "responses"),
198-
usage_stats=standardize_openai_usage(usage_stats, "responses"),
199+
usage_stats=usage_stats,
199200
latency=latency,
200201
distinct_id=posthog_distinct_id,
201202
trace_id=posthog_trace_id,
@@ -316,7 +317,7 @@ def _create_streaming(
316317
**kwargs: Any,
317318
):
318319
start_time = time.time()
319-
usage_stats: Dict[str, int] = {}
320+
usage_stats: TokenUsage = TokenUsage()
320321
accumulated_content = []
321322
accumulated_tool_calls: Dict[int, Dict[str, Any]] = {}
322323
if "stream_options" not in kwargs:
@@ -387,15 +388,14 @@ def _capture_streaming_event(
387388
posthog_privacy_mode: bool,
388389
posthog_groups: Optional[Dict[str, Any]],
389390
kwargs: Dict[str, Any],
390-
usage_stats: Dict[str, int],
391+
usage_stats: TokenUsage,
391392
latency: float,
392393
output: Any,
393394
tool_calls: Optional[List[Dict[str, Any]]] = None,
394395
available_tool_calls: Optional[List[Dict[str, Any]]] = None,
395396
):
396397
from posthog.ai.types import StreamingEventData
397398
from posthog.ai.openai.openai_converter import (
398-
standardize_openai_usage,
399399
format_openai_streaming_input,
400400
format_openai_streaming_output,
401401
)
@@ -412,7 +412,7 @@ def _capture_streaming_event(
412412
kwargs=kwargs,
413413
formatted_input=sanitized_input,
414414
formatted_output=format_openai_streaming_output(output, "chat", tool_calls),
415-
usage_stats=standardize_openai_usage(usage_stats, "chat"),
415+
usage_stats=usage_stats,
416416
latency=latency,
417417
distinct_id=posthog_distinct_id,
418418
trace_id=posthog_trace_id,

0 commit comments

Comments
 (0)