@@ -1564,9 +1564,9 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15641564 AIMessage (
15651565 content = "Using cached analysis to provide quick response." ,
15661566 usage_metadata = {
1567- "input_tokens" : 200 ,
1567+ "input_tokens" : 1200 ,
15681568 "output_tokens" : 30 ,
1569- "total_tokens" : 1030 ,
1569+ "total_tokens" : 1230 ,
15701570 "cache_read_input_tokens" : 800 , # Anthropic cache read
15711571 },
15721572 )
@@ -1583,7 +1583,7 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15831583 generation_props = generation_args ["properties" ]
15841584
15851585 assert generation_args ["event" ] == "$ai_generation"
1586- assert generation_props ["$ai_input_tokens" ] == 200
1586+ assert generation_props ["$ai_input_tokens" ] == 400
15871587 assert generation_props ["$ai_output_tokens" ] == 30
15881588 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
15891589 assert generation_props ["$ai_cache_read_input_tokens" ] == 800
@@ -1625,7 +1625,7 @@ def test_openai_cache_read_tokens(mock_client):
16251625 generation_props = generation_args ["properties" ]
16261626
16271627 assert generation_args ["event" ] == "$ai_generation"
1628- assert generation_props ["$ai_input_tokens" ] == 150
1628+ assert generation_props ["$ai_input_tokens" ] == 50
16291629 assert generation_props ["$ai_output_tokens" ] == 40
16301630 assert generation_props ["$ai_cache_read_input_tokens" ] == 100
16311631 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
@@ -1707,7 +1707,7 @@ def test_combined_reasoning_and_cache_tokens(mock_client):
17071707 generation_props = generation_args ["properties" ]
17081708
17091709 assert generation_args ["event" ] == "$ai_generation"
1710- assert generation_props ["$ai_input_tokens" ] == 500
1710+ assert generation_props ["$ai_input_tokens" ] == 200
17111711 assert generation_props ["$ai_output_tokens" ] == 100
17121712 assert generation_props ["$ai_cache_read_input_tokens" ] == 300
17131713 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
@@ -1876,3 +1876,207 @@ def test_tool_definition(mock_client):
18761876 assert props ["$ai_latency" ] == 1.0
18771877 # Verify that tools are captured in the $ai_tools property
18781878 assert props ["$ai_tools" ] == tools
1879+
1880+
1881+ def test_cache_read_tokens_subtraction_from_input_tokens (mock_client ):
1882+ """Test that cache_read_tokens are properly subtracted from input_tokens.
1883+
1884+ This tests the logic in callbacks.py lines 757-758:
1885+ if normalized_usage.input_tokens and normalized_usage.cache_read_tokens:
1886+ normalized_usage.input_tokens = max(normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0)
1887+ """
1888+ prompt = ChatPromptTemplate .from_messages (
1889+ [("user" , "Use the cached prompt for this request" )]
1890+ )
1891+
1892+ # Scenario 1: input_tokens includes cache_read_tokens (typical case)
1893+ # input_tokens=150 includes 100 cache_read tokens, so actual input is 50
1894+ model = FakeMessagesListChatModel (
1895+ responses = [
1896+ AIMessage (
1897+ content = "Response using cached prompt context." ,
1898+ usage_metadata = {
1899+ "input_tokens" : 150 , # Total includes cache reads
1900+ "output_tokens" : 40 ,
1901+ "total_tokens" : 190 ,
1902+ "cache_read_input_tokens" : 100 , # 100 tokens read from cache
1903+ },
1904+ )
1905+ ]
1906+ )
1907+
1908+ callbacks = [CallbackHandler (mock_client )]
1909+ chain = prompt | model
1910+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1911+
1912+ assert result .content == "Response using cached prompt context."
1913+ assert mock_client .capture .call_count == 3
1914+
1915+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
1916+ generation_props = generation_args ["properties" ]
1917+
1918+ assert generation_args ["event" ] == "$ai_generation"
1919+ # Input tokens should be reduced: 150 - 100 = 50
1920+ assert generation_props ["$ai_input_tokens" ] == 50
1921+ assert generation_props ["$ai_output_tokens" ] == 40
1922+ assert generation_props ["$ai_cache_read_input_tokens" ] == 100
1923+
1924+
1925+ def test_cache_read_tokens_subtraction_prevents_negative (mock_client ):
1926+ """Test that cache_read_tokens subtraction doesn't result in negative input_tokens.
1927+
1928+ This tests the max(..., 0) part of the logic in callbacks.py lines 757-758.
1929+ """
1930+ prompt = ChatPromptTemplate .from_messages (
1931+ [("user" , "Edge case with large cache read" )]
1932+ )
1933+
1934+ # Edge case: cache_read_tokens >= input_tokens
1935+ # This could happen in some API responses where accounting differs
1936+ model = FakeMessagesListChatModel (
1937+ responses = [
1938+ AIMessage (
1939+ content = "Response with edge case token counts." ,
1940+ usage_metadata = {
1941+ "input_tokens" : 80 ,
1942+ "output_tokens" : 20 ,
1943+ "total_tokens" : 100 ,
1944+ "cache_read_input_tokens" : 100 , # More than input_tokens
1945+ },
1946+ )
1947+ ]
1948+ )
1949+
1950+ callbacks = [CallbackHandler (mock_client )]
1951+ chain = prompt | model
1952+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1953+
1954+ assert result .content == "Response with edge case token counts."
1955+ assert mock_client .capture .call_count == 3
1956+
1957+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
1958+ generation_props = generation_args ["properties" ]
1959+
1960+ assert generation_args ["event" ] == "$ai_generation"
1961+ # Input tokens should be 0, not negative: max(80 - 100, 0) = 0
1962+ assert generation_props ["$ai_input_tokens" ] == 0
1963+ assert generation_props ["$ai_output_tokens" ] == 20
1964+ assert generation_props ["$ai_cache_read_input_tokens" ] == 100
1965+
1966+
1967+ def test_no_cache_read_tokens_no_subtraction (mock_client ):
1968+ """Test that when there are no cache_read_tokens, input_tokens remain unchanged.
1969+
1970+ This tests the conditional check before the subtraction in callbacks.py line 757.
1971+ """
1972+ prompt = ChatPromptTemplate .from_messages ([("user" , "Normal request without cache" )])
1973+
1974+ # No cache usage - input_tokens should remain as-is
1975+ model = FakeMessagesListChatModel (
1976+ responses = [
1977+ AIMessage (
1978+ content = "Response without cache." ,
1979+ usage_metadata = {
1980+ "input_tokens" : 100 ,
1981+ "output_tokens" : 30 ,
1982+ "total_tokens" : 130 ,
1983+ # No cache_read_input_tokens
1984+ },
1985+ )
1986+ ]
1987+ )
1988+
1989+ callbacks = [CallbackHandler (mock_client )]
1990+ chain = prompt | model
1991+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1992+
1993+ assert result .content == "Response without cache."
1994+ assert mock_client .capture .call_count == 3
1995+
1996+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
1997+ generation_props = generation_args ["properties" ]
1998+
1999+ assert generation_args ["event" ] == "$ai_generation"
2000+ # Input tokens should remain unchanged at 100
2001+ assert generation_props ["$ai_input_tokens" ] == 100
2002+ assert generation_props ["$ai_output_tokens" ] == 30
2003+ assert generation_props ["$ai_cache_read_input_tokens" ] == 0
2004+
2005+
2006+ def test_zero_input_tokens_with_cache_read (mock_client ):
2007+ """Test edge case where input_tokens is 0 but cache_read_tokens exist.
2008+
2009+ This tests the falsy check in the conditional (line 757).
2010+ """
2011+ prompt = ChatPromptTemplate .from_messages ([("user" , "Edge case query" )])
2012+
2013+ # Edge case: input_tokens is 0 (falsy), should skip subtraction
2014+ model = FakeMessagesListChatModel (
2015+ responses = [
2016+ AIMessage (
2017+ content = "Response." ,
2018+ usage_metadata = {
2019+ "input_tokens" : 0 ,
2020+ "output_tokens" : 10 ,
2021+ "total_tokens" : 10 ,
2022+ "cache_read_input_tokens" : 50 ,
2023+ },
2024+ )
2025+ ]
2026+ )
2027+
2028+ callbacks = [CallbackHandler (mock_client )]
2029+ chain = prompt | model
2030+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
2031+
2032+ assert result .content == "Response."
2033+ assert mock_client .capture .call_count == 3
2034+
2035+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
2036+ generation_props = generation_args ["properties" ]
2037+
2038+ assert generation_args ["event" ] == "$ai_generation"
2039+ # Input tokens should remain 0 (no subtraction because input_tokens is falsy)
2040+ assert generation_props ["$ai_input_tokens" ] == 0
2041+ assert generation_props ["$ai_output_tokens" ] == 10
2042+ assert generation_props ["$ai_cache_read_input_tokens" ] == 50
2043+
2044+
2045+ def test_cache_write_tokens_not_subtracted_from_input (mock_client ):
2046+ """Test that cache_creation_input_tokens (cache write) do NOT affect input_tokens.
2047+
2048+ Only cache_read_tokens should be subtracted from input_tokens, not cache_write_tokens.
2049+ """
2050+ prompt = ChatPromptTemplate .from_messages ([("user" , "Create cache" )])
2051+
2052+ # Cache creation without cache read
2053+ model = FakeMessagesListChatModel (
2054+ responses = [
2055+ AIMessage (
2056+ content = "Creating cache." ,
2057+ usage_metadata = {
2058+ "input_tokens" : 1000 ,
2059+ "output_tokens" : 20 ,
2060+ "total_tokens" : 1020 ,
2061+ "cache_creation_input_tokens" : 800 , # Cache write, not read
2062+ },
2063+ )
2064+ ]
2065+ )
2066+
2067+ callbacks = [CallbackHandler (mock_client )]
2068+ chain = prompt | model
2069+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
2070+
2071+ assert result .content == "Creating cache."
2072+ assert mock_client .capture .call_count == 3
2073+
2074+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
2075+ generation_props = generation_args ["properties" ]
2076+
2077+ assert generation_args ["event" ] == "$ai_generation"
2078+ # Input tokens should NOT be reduced by cache_creation_input_tokens
2079+ assert generation_props ["$ai_input_tokens" ] == 1000
2080+ assert generation_props ["$ai_output_tokens" ] == 20
2081+ assert generation_props ["$ai_cache_creation_input_tokens" ] == 800
2082+ assert generation_props ["$ai_cache_read_input_tokens" ] == 0
0 commit comments