1414 List ,
1515 Optional ,
1616 Sequence ,
17- Tuple ,
1817 Union ,
1918 cast ,
2019)
@@ -569,9 +568,14 @@ def _capture_generation(
569568 event_properties ["$ai_is_error" ] = True
570569 else :
571570 # Add usage
572- input_tokens , output_tokens = _parse_usage (output )
573- event_properties ["$ai_input_tokens" ] = input_tokens
574- event_properties ["$ai_output_tokens" ] = output_tokens
571+ usage = _parse_usage (output )
572+ event_properties ["$ai_input_tokens" ] = usage .input_tokens
573+ event_properties ["$ai_output_tokens" ] = usage .output_tokens
574+ event_properties ["$ai_cache_creation_input_tokens" ] = (
575+ usage .cache_write_tokens
576+ )
577+ event_properties ["$ai_cache_read_input_tokens" ] = usage .cache_read_tokens
578+ event_properties ["$ai_reasoning_tokens" ] = usage .reasoning_tokens
575579
576580 # Generation results
577581 generation_result = output .generations [- 1 ]
@@ -647,25 +651,42 @@ def _convert_message_to_dict(message: BaseMessage) -> Dict[str, Any]:
647651 return message_dict
648652
649653
654+ @dataclass
655+ class ModelUsage :
656+ input_tokens : Optional [int ]
657+ output_tokens : Optional [int ]
658+ cache_write_tokens : Optional [int ]
659+ cache_read_tokens : Optional [int ]
660+ reasoning_tokens : Optional [int ]
661+
662+
650663def _parse_usage_model (
651- usage : Union [BaseModel , Dict ],
652- ) -> Tuple [ Union [ int , None ], Union [ int , None ]] :
664+ usage : Union [BaseModel , dict ],
665+ ) -> ModelUsage :
653666 if isinstance (usage , BaseModel ):
654667 usage = usage .__dict__
655668
656669 conversion_list = [
657670 # https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic)
658671 ("input_tokens" , "input" ),
659672 ("output_tokens" , "output" ),
673+ ("cache_creation_input_tokens" , "cache_write" ),
674+ ("cache_read_input_tokens" , "cache_read" ),
660675 # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
661676 ("prompt_token_count" , "input" ),
662677 ("candidates_token_count" , "output" ),
678+ ("cached_content_token_count" , "cache_read" ),
679+ ("thoughts_token_count" , "reasoning" ),
663680 # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
664681 ("inputTokenCount" , "input" ),
665682 ("outputTokenCount" , "output" ),
683+ ("cacheCreationInputTokenCount" , "cache_write" ),
684+ ("cacheReadInputTokenCount" , "cache_read" ),
666685 # Bedrock Anthropic
667686 ("prompt_tokens" , "input" ),
668687 ("completion_tokens" , "output" ),
688+ ("cache_creation_input_tokens" , "cache_write" ),
689+ ("cache_read_input_tokens" , "cache_read" ),
669690 # langchain-ibm https://pypi.org/project/langchain-ibm/
670691 ("input_token_count" , "input" ),
671692 ("generated_token_count" , "output" ),
@@ -683,13 +704,39 @@ def _parse_usage_model(
683704
684705 parsed_usage [type_key ] = final_count
685706
686- return parsed_usage .get ("input" ), parsed_usage .get ("output" )
707+ # Caching (OpenAI & langchain 0.3.9+)
708+ if "input_token_details" in usage and isinstance (
709+ usage ["input_token_details" ], dict
710+ ):
711+ parsed_usage ["cache_write" ] = usage ["input_token_details" ].get ("cache_creation" )
712+ parsed_usage ["cache_read" ] = usage ["input_token_details" ].get ("cache_read" )
713+
714+ # Reasoning (OpenAI & langchain 0.3.9+)
715+ if "output_token_details" in usage and isinstance (
716+ usage ["output_token_details" ], dict
717+ ):
718+ parsed_usage ["reasoning" ] = usage ["output_token_details" ].get ("reasoning" )
719+
720+ return ModelUsage (
721+ input_tokens = parsed_usage .get ("input" ),
722+ output_tokens = parsed_usage .get ("output" ),
723+ cache_write_tokens = parsed_usage .get ("cache_write" ),
724+ cache_read_tokens = parsed_usage .get ("cache_read" ),
725+ reasoning_tokens = parsed_usage .get ("reasoning" ),
726+ )
687727
688728
689- def _parse_usage (response : LLMResult ):
729+ def _parse_usage (response : LLMResult ) -> ModelUsage :
690730 # langchain-anthropic uses the usage field
691731 llm_usage_keys = ["token_usage" , "usage" ]
692- llm_usage : Tuple [Union [int , None ], Union [int , None ]] = (None , None )
732+ llm_usage : ModelUsage = ModelUsage (
733+ input_tokens = None ,
734+ output_tokens = None ,
735+ cache_write_tokens = None ,
736+ cache_read_tokens = None ,
737+ reasoning_tokens = None ,
738+ )
739+
693740 if response .llm_output is not None :
694741 for key in llm_usage_keys :
695742 if response .llm_output .get (key ):
0 commit comments