@@ -1586,13 +1586,58 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15861586 generation_props = generation_args ["properties" ]
15871587
15881588 assert generation_args ["event" ] == "$ai_generation"
1589- assert generation_props ["$ai_input_tokens" ] == 400
1589+ assert (
1590+ generation_props ["$ai_input_tokens" ] == 1200
1591+ ) # No provider metadata, no subtraction
15901592 assert generation_props ["$ai_output_tokens" ] == 30
15911593 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
15921594 assert generation_props ["$ai_cache_read_input_tokens" ] == 800
15931595 assert generation_props ["$ai_reasoning_tokens" ] == 0
15941596
15951597
1598+ def test_anthropic_provider_subtracts_cache_tokens (mock_client ):
1599+ """Test that Anthropic provider correctly subtracts cache tokens from input tokens."""
1600+ from langchain_core .outputs import LLMResult , ChatGeneration
1601+ from langchain_core .messages import AIMessage
1602+ from uuid import uuid4
1603+
1604+ cb = CallbackHandler (mock_client )
1605+ run_id = uuid4 ()
1606+
1607+ # Set up with Anthropic provider
1608+ cb ._set_llm_metadata (
1609+ serialized = {},
1610+ run_id = run_id ,
1611+ messages = [{"role" : "user" , "content" : "test" }],
1612+ metadata = {"ls_provider" : "anthropic" , "ls_model_name" : "claude-3-sonnet" },
1613+ )
1614+
1615+ # Response with cache tokens: 1200 input (includes 800 cached)
1616+ response = LLMResult (
1617+ generations = [
1618+ [
1619+ ChatGeneration (
1620+ message = AIMessage (content = "Response" ),
1621+ generation_info = {
1622+ "usage_metadata" : {
1623+ "input_tokens" : 1200 ,
1624+ "output_tokens" : 50 ,
1625+ "cache_read_input_tokens" : 800 ,
1626+ }
1627+ },
1628+ )
1629+ ]
1630+ ],
1631+ llm_output = {},
1632+ )
1633+
1634+ cb ._pop_run_and_capture_generation (run_id , None , response )
1635+
1636+ generation_args = mock_client .capture .call_args_list [0 ][1 ]
1637+ assert generation_args ["properties" ]["$ai_input_tokens" ] == 400 # 1200 - 800
1638+ assert generation_args ["properties" ]["$ai_cache_read_input_tokens" ] == 800
1639+
1640+
15961641def test_openai_cache_read_tokens (mock_client ):
15971642 """Test that OpenAI cache read tokens are captured correctly."""
15981643 prompt = ChatPromptTemplate .from_messages (
@@ -1628,7 +1673,7 @@ def test_openai_cache_read_tokens(mock_client):
16281673 generation_props = generation_args ["properties" ]
16291674
16301675 assert generation_args ["event" ] == "$ai_generation"
1631- assert generation_props ["$ai_input_tokens" ] == 50
1676+ assert generation_props ["$ai_input_tokens" ] == 150 # No subtraction for OpenAI
16321677 assert generation_props ["$ai_output_tokens" ] == 40
16331678 assert generation_props ["$ai_cache_read_input_tokens" ] == 100
16341679 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
@@ -1710,15 +1755,15 @@ def test_combined_reasoning_and_cache_tokens(mock_client):
17101755 generation_props = generation_args ["properties" ]
17111756
17121757 assert generation_args ["event" ] == "$ai_generation"
1713- assert generation_props ["$ai_input_tokens" ] == 200
1758+ assert generation_props ["$ai_input_tokens" ] == 500 # No subtraction for OpenAI
17141759 assert generation_props ["$ai_output_tokens" ] == 100
17151760 assert generation_props ["$ai_cache_read_input_tokens" ] == 300
17161761 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
17171762 assert generation_props ["$ai_reasoning_tokens" ] == 60
17181763
17191764
17201765@pytest .mark .skipif (not OPENAI_API_KEY , reason = "OPENAI_API_KEY is not set" )
1721- def test_openai_reasoning_tokens (mock_client ):
1766+ def test_openai_reasoning_tokens_o4_mini (mock_client ):
17221767 model = ChatOpenAI (
17231768 api_key = OPENAI_API_KEY , model = "o4-mini" , max_completion_tokens = 10
17241769 )
@@ -1919,8 +1964,8 @@ def test_cache_read_tokens_subtraction_from_input_tokens(mock_client):
19191964 generation_props = generation_args ["properties" ]
19201965
19211966 assert generation_args ["event" ] == "$ai_generation"
1922- # Input tokens should be reduced: 150 - 100 = 50
1923- assert generation_props ["$ai_input_tokens" ] == 50
1967+ # Input tokens not reduced without provider metadata
1968+ assert generation_props ["$ai_input_tokens" ] == 150
19241969 assert generation_props ["$ai_output_tokens" ] == 40
19251970 assert generation_props ["$ai_cache_read_input_tokens" ] == 100
19261971
@@ -1961,8 +2006,8 @@ def test_cache_read_tokens_subtraction_prevents_negative(mock_client):
19612006 generation_props = generation_args ["properties" ]
19622007
19632008 assert generation_args ["event" ] == "$ai_generation"
1964- # Input tokens should be 0, not negative: max(80 - 100, 0) = 0
1965- assert generation_props ["$ai_input_tokens" ] == 0
2009+ # Input tokens not reduced without provider metadata
2010+ assert generation_props ["$ai_input_tokens" ] == 80
19662011 assert generation_props ["$ai_output_tokens" ] == 20
19672012 assert generation_props ["$ai_cache_read_input_tokens" ] == 100
19682013
0 commit comments