Skip to content

Commit 9c1aedd

Browse files
committed
fix: reasoning and cached tokens
1 parent 9db1b7e commit 9c1aedd

File tree

1 file changed

+56
-9
lines changed

1 file changed

+56
-9
lines changed

posthog/ai/langchain/callbacks.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
List,
1515
Optional,
1616
Sequence,
17-
Tuple,
1817
Union,
1918
cast,
2019
)
@@ -569,9 +568,14 @@ def _capture_generation(
569568
event_properties["$ai_is_error"] = True
570569
else:
571570
# Add usage
572-
input_tokens, output_tokens = _parse_usage(output)
573-
event_properties["$ai_input_tokens"] = input_tokens
574-
event_properties["$ai_output_tokens"] = output_tokens
571+
usage = _parse_usage(output)
572+
event_properties["$ai_input_tokens"] = usage.input_tokens
573+
event_properties["$ai_output_tokens"] = usage.output_tokens
574+
event_properties["$ai_cache_creation_input_tokens"] = (
575+
usage.cache_write_tokens
576+
)
577+
event_properties["$ai_cache_read_input_tokens"] = usage.cache_read_tokens
578+
event_properties["$ai_reasoning_tokens"] = usage.reasoning_tokens
575579

576580
# Generation results
577581
generation_result = output.generations[-1]
@@ -647,25 +651,42 @@ def _convert_message_to_dict(message: BaseMessage) -> Dict[str, Any]:
647651
return message_dict
648652

649653

654+
@dataclass
655+
class ModelUsage:
656+
input_tokens: Optional[int]
657+
output_tokens: Optional[int]
658+
cache_write_tokens: Optional[int]
659+
cache_read_tokens: Optional[int]
660+
reasoning_tokens: Optional[int]
661+
662+
650663
def _parse_usage_model(
651-
usage: Union[BaseModel, Dict],
652-
) -> Tuple[Union[int, None], Union[int, None]]:
664+
usage: Union[BaseModel, dict],
665+
) -> ModelUsage:
653666
if isinstance(usage, BaseModel):
654667
usage = usage.__dict__
655668

656669
conversion_list = [
657670
# https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic)
658671
("input_tokens", "input"),
659672
("output_tokens", "output"),
673+
("cache_creation_input_tokens", "cache_write"),
674+
("cache_read_input_tokens", "cache_read"),
660675
# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
661676
("prompt_token_count", "input"),
662677
("candidates_token_count", "output"),
678+
("cached_content_token_count", "cache_read"),
679+
("thoughts_token_count", "reasoning"),
663680
# Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
664681
("inputTokenCount", "input"),
665682
("outputTokenCount", "output"),
683+
("cacheCreationInputTokenCount", "cache_write"),
684+
("cacheReadInputTokenCount", "cache_read"),
666685
# Bedrock Anthropic
667686
("prompt_tokens", "input"),
668687
("completion_tokens", "output"),
688+
("cache_creation_input_tokens", "cache_write"),
689+
("cache_read_input_tokens", "cache_read"),
669690
# langchain-ibm https://pypi.org/project/langchain-ibm/
670691
("input_token_count", "input"),
671692
("generated_token_count", "output"),
@@ -683,13 +704,39 @@ def _parse_usage_model(
683704

684705
parsed_usage[type_key] = final_count
685706

686-
return parsed_usage.get("input"), parsed_usage.get("output")
707+
# Caching (OpenAI & langchain 0.3.9+)
708+
if "input_token_details" in usage and isinstance(
709+
usage["input_token_details"], dict
710+
):
711+
parsed_usage["cache_write"] = usage["input_token_details"].get("cache_creation")
712+
parsed_usage["cache_read"] = usage["input_token_details"].get("cache_read")
713+
714+
# Reasoning (OpenAI & langchain 0.3.9+)
715+
if "output_token_details" in usage and isinstance(
716+
usage["output_token_details"], dict
717+
):
718+
parsed_usage["reasoning"] = usage["output_token_details"].get("reasoning")
719+
720+
return ModelUsage(
721+
input_tokens=parsed_usage.get("input"),
722+
output_tokens=parsed_usage.get("output"),
723+
cache_write_tokens=parsed_usage.get("cache_write"),
724+
cache_read_tokens=parsed_usage.get("cache_read"),
725+
reasoning_tokens=parsed_usage.get("reasoning"),
726+
)
687727

688728

689-
def _parse_usage(response: LLMResult):
729+
def _parse_usage(response: LLMResult) -> ModelUsage:
690730
# langchain-anthropic uses the usage field
691731
llm_usage_keys = ["token_usage", "usage"]
692-
llm_usage: Tuple[Union[int, None], Union[int, None]] = (None, None)
732+
llm_usage: ModelUsage = ModelUsage(
733+
input_tokens=None,
734+
output_tokens=None,
735+
cache_write_tokens=None,
736+
cache_read_tokens=None,
737+
reasoning_tokens=None,
738+
)
739+
693740
if response.llm_output is not None:
694741
for key in llm_usage_keys:
695742
if response.llm_output.get(key):

0 commit comments

Comments
 (0)