Skip to content

Commit 434a756

Browse files
committed
test: new flows
1 parent 9c1aedd commit 434a756

File tree

2 files changed

+254
-11
lines changed

2 files changed

+254
-11
lines changed

posthog/test/ai/langchain/test_callbacks.py

Lines changed: 249 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,11 +1378,11 @@ def get_weather(city: Literal["nyc", "sf"]):
13781378
)
13791379
graph.invoke(inputs, config={"callbacks": [cb]})
13801380
calls = [call[1] for call in mock_client.capture.call_args_list]
1381-
assert len(calls) == 21
1381+
assert len(calls) == 15
13821382
for call in calls:
13831383
assert call["properties"]["$ai_trace_id"] == "test-trace-id"
13841384
assert len([call for call in calls if call["event"] == "$ai_generation"]) == 2
1385-
assert len([call for call in calls if call["event"] == "$ai_span"]) == 18
1385+
assert len([call for call in calls if call["event"] == "$ai_span"]) == 12
13861386
assert len([call for call in calls if call["event"] == "$ai_trace"]) == 1
13871387

13881388

@@ -1435,11 +1435,13 @@ def span_3(_):
14351435

14361436
assert mock_client.capture.call_count == 3
14371437

1438-
span2, span1, trace = [
1439-
call[1]["properties"] for call in mock_client.capture.call_args_list
1440-
]
1441-
assert span2["$ai_parent_id"] == span1["$ai_span_id"]
1442-
assert span1["$ai_parent_id"] == trace["$ai_trace_id"]
1438+
calls = mock_client.capture.call_args_list
1439+
span_props_2 = calls[0][1]["properties"]
1440+
span_props_1 = calls[1][1]["properties"]
1441+
trace_props = calls[2][1]["properties"]
1442+
1443+
assert span_props_2["$ai_parent_id"] == span_props_1["$ai_span_id"]
1444+
assert span_props_1["$ai_parent_id"] == trace_props["$ai_trace_id"]
14431445

14441446

14451447
def test_captures_error_with_details_in_span(mock_client):
@@ -1478,3 +1480,243 @@ def span(_):
14781480
== "ValueError"
14791481
)
14801482
assert mock_client.capture.call_args_list[1][1]["properties"]["$ai_is_error"]
1483+
1484+
1485+
def test_openai_reasoning_tokens_o1_mini(mock_client):
1486+
"""Test that OpenAI reasoning tokens (o1-mini) are captured correctly."""
1487+
prompt = ChatPromptTemplate.from_messages(
1488+
[("user", "Think step by step about this problem")]
1489+
)
1490+
1491+
# Mock response with reasoning tokens in output_token_details
1492+
model = FakeMessagesListChatModel(
1493+
responses=[
1494+
AIMessage(
1495+
content="Let me think through this step by step...",
1496+
usage_metadata={
1497+
"input_tokens": 10,
1498+
"output_tokens": 25,
1499+
"total_tokens": 35,
1500+
"output_token_details": {"reasoning": 15}, # 15 reasoning tokens
1501+
},
1502+
)
1503+
]
1504+
)
1505+
1506+
callbacks = [CallbackHandler(mock_client)]
1507+
chain = prompt | model
1508+
result = chain.invoke({}, config={"callbacks": callbacks})
1509+
1510+
assert result.content == "Let me think through this step by step..."
1511+
assert mock_client.capture.call_count == 3
1512+
1513+
generation_args = mock_client.capture.call_args_list[1][1]
1514+
generation_props = generation_args["properties"]
1515+
1516+
assert generation_args["event"] == "$ai_generation"
1517+
assert generation_props["$ai_input_tokens"] == 10
1518+
assert generation_props["$ai_output_tokens"] == 25
1519+
assert generation_props["$ai_reasoning_tokens"] == 15
1520+
1521+
1522+
def test_anthropic_cache_write_and_read_tokens(mock_client):
1523+
"""Test that Anthropic cache creation and read tokens are captured correctly."""
1524+
prompt = ChatPromptTemplate.from_messages([("user", "Analyze this large document")])
1525+
1526+
# First call with cache creation
1527+
model_write = FakeMessagesListChatModel(
1528+
responses=[
1529+
AIMessage(
1530+
content="I've analyzed the document and cached the context.",
1531+
usage_metadata={
1532+
"total_tokens": 1050,
1533+
"input_tokens": 1000,
1534+
"output_tokens": 50,
1535+
"cache_creation_input_tokens": 800, # Anthropic cache write
1536+
},
1537+
)
1538+
]
1539+
)
1540+
1541+
callbacks = [CallbackHandler(mock_client)]
1542+
chain = prompt | model_write
1543+
result = chain.invoke({}, config={"callbacks": callbacks})
1544+
1545+
assert result.content == "I've analyzed the document and cached the context."
1546+
assert mock_client.capture.call_count == 3
1547+
1548+
generation_args = mock_client.capture.call_args_list[1][1]
1549+
generation_props = generation_args["properties"]
1550+
1551+
assert generation_args["event"] == "$ai_generation"
1552+
assert generation_props["$ai_input_tokens"] == 1000
1553+
assert generation_props["$ai_output_tokens"] == 50
1554+
assert generation_props["$ai_cache_creation_input_tokens"] == 800
1555+
assert generation_props["$ai_cache_read_input_tokens"] is None
1556+
1557+
# Reset mock for second call
1558+
mock_client.reset_mock()
1559+
1560+
# Second call with cache read
1561+
model_read = FakeMessagesListChatModel(
1562+
responses=[
1563+
AIMessage(
1564+
content="Using cached analysis to provide quick response.",
1565+
usage_metadata={
1566+
"input_tokens": 200,
1567+
"output_tokens": 30,
1568+
"cache_read_input_tokens": 800, # Anthropic cache read
1569+
},
1570+
)
1571+
]
1572+
)
1573+
1574+
chain = prompt | model_read
1575+
result = chain.invoke({}, config={"callbacks": callbacks})
1576+
1577+
assert result.content == "Using cached analysis to provide quick response."
1578+
assert mock_client.capture.call_count == 3
1579+
1580+
generation_args = mock_client.capture.call_args_list[1][1]
1581+
generation_props = generation_args["properties"]
1582+
1583+
assert generation_args["event"] == "$ai_generation"
1584+
assert generation_props["$ai_input_tokens"] == 200
1585+
assert generation_props["$ai_output_tokens"] == 30
1586+
assert generation_props["$ai_cache_creation_input_tokens"] is None
1587+
assert generation_props["$ai_cache_read_input_tokens"] == 800
1588+
1589+
1590+
def test_openai_cache_read_tokens(mock_client):
1591+
"""Test that OpenAI cache read tokens are captured correctly."""
1592+
prompt = ChatPromptTemplate.from_messages(
1593+
[("user", "Use the cached prompt for this request")]
1594+
)
1595+
1596+
# Mock response with cache read tokens in input_token_details
1597+
model = FakeMessagesListChatModel(
1598+
responses=[
1599+
AIMessage(
1600+
content="Response using cached prompt context.",
1601+
usage_metadata={
1602+
"input_tokens": 150,
1603+
"output_tokens": 40,
1604+
"total_tokens": 190,
1605+
"input_token_details": {
1606+
"cache_read": 100, # 100 tokens read from cache
1607+
"cache_creation": 0,
1608+
},
1609+
},
1610+
)
1611+
]
1612+
)
1613+
1614+
callbacks = [CallbackHandler(mock_client)]
1615+
chain = prompt | model
1616+
result = chain.invoke({}, config={"callbacks": callbacks})
1617+
1618+
assert result.content == "Response using cached prompt context."
1619+
assert mock_client.capture.call_count == 3
1620+
1621+
generation_args = mock_client.capture.call_args_list[1][1]
1622+
generation_props = generation_args["properties"]
1623+
1624+
assert generation_args["event"] == "$ai_generation"
1625+
assert generation_props["$ai_input_tokens"] == 150
1626+
assert generation_props["$ai_output_tokens"] == 40
1627+
assert generation_props["$ai_cache_read_input_tokens"] == 100
1628+
assert generation_props["$ai_cache_creation_input_tokens"] == 0
1629+
1630+
1631+
def test_openai_cache_creation_tokens(mock_client):
1632+
"""Test that OpenAI cache creation tokens are captured correctly."""
1633+
prompt = ChatPromptTemplate.from_messages(
1634+
[("user", "Create a cache for this large prompt context")]
1635+
)
1636+
1637+
# Mock response with cache creation tokens in input_token_details
1638+
model = FakeMessagesListChatModel(
1639+
responses=[
1640+
AIMessage(
1641+
content="Created cache for the prompt context.",
1642+
usage_metadata={
1643+
"input_tokens": 2000,
1644+
"output_tokens": 25,
1645+
"total_tokens": 2025,
1646+
"input_token_details": {
1647+
"cache_creation": 1500, # 1500 tokens written to cache
1648+
"cache_read": 0,
1649+
},
1650+
},
1651+
)
1652+
]
1653+
)
1654+
1655+
callbacks = [CallbackHandler(mock_client)]
1656+
chain = prompt | model
1657+
result = chain.invoke({}, config={"callbacks": callbacks})
1658+
1659+
assert result.content == "Created cache for the prompt context."
1660+
assert mock_client.capture.call_count == 3
1661+
1662+
generation_args = mock_client.capture.call_args_list[1][1]
1663+
generation_props = generation_args["properties"]
1664+
1665+
assert generation_args["event"] == "$ai_generation"
1666+
assert generation_props["$ai_input_tokens"] == 2000
1667+
assert generation_props["$ai_output_tokens"] == 25
1668+
assert generation_props["$ai_cache_creation_input_tokens"] == 1500
1669+
assert generation_props["$ai_cache_read_input_tokens"] == 0
1670+
1671+
1672+
def test_combined_reasoning_and_cache_tokens(mock_client):
1673+
"""Test that both reasoning tokens and cache tokens can be captured together."""
1674+
prompt = ChatPromptTemplate.from_messages(
1675+
[("user", "Think through this cached problem")]
1676+
)
1677+
1678+
# Mock response with both reasoning and cache tokens
1679+
model = FakeMessagesListChatModel(
1680+
responses=[
1681+
AIMessage(
1682+
content="Let me reason through this using cached context...",
1683+
usage_metadata={
1684+
"input_tokens": 500,
1685+
"output_tokens": 100,
1686+
"total_tokens": 600,
1687+
"input_token_details": {"cache_read": 300, "cache_creation": 0},
1688+
"output_token_details": {"reasoning": 60}, # 60 reasoning tokens
1689+
},
1690+
)
1691+
]
1692+
)
1693+
1694+
callbacks = [CallbackHandler(mock_client)]
1695+
chain = prompt | model
1696+
result = chain.invoke({}, config={"callbacks": callbacks})
1697+
1698+
assert result.content == "Let me reason through this using cached context..."
1699+
assert mock_client.capture.call_count == 3
1700+
1701+
generation_args = mock_client.capture.call_args_list[1][1]
1702+
generation_props = generation_args["properties"]
1703+
1704+
assert generation_args["event"] == "$ai_generation"
1705+
assert generation_props["$ai_input_tokens"] == 500
1706+
assert generation_props["$ai_output_tokens"] == 100
1707+
assert generation_props["$ai_cache_read_input_tokens"] == 300
1708+
assert generation_props["$ai_cache_creation_input_tokens"] == 0
1709+
assert generation_props["$ai_reasoning_tokens"] == 60
1710+
1711+
1712+
@pytest.mark.skipif(not OPENAI_API_KEY, reason="OPENAI_API_KEY is not set")
1713+
def test_openai_reasoning_tokens(mock_client):
1714+
model = ChatOpenAI(api_key=OPENAI_API_KEY, model="o4-mini", max_tokens=10)
1715+
cb = CallbackHandler(
1716+
mock_client, trace_id="test-trace-id", distinct_id="test-distinct-id"
1717+
)
1718+
model.invoke("what is the weather in sf", config={"callbacks": [cb]})
1719+
call = mock_client.capture.call_args_list[0][1]
1720+
assert call["properties"]["$ai_reasoning_tokens"] is not None
1721+
assert call["properties"]["$ai_input_tokens"] is not None
1722+
assert call["properties"]["$ai_output_tokens"] is not None

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,11 @@ test = [
6868
"django",
6969
"openai",
7070
"anthropic",
71-
"langgraph",
72-
"langchain-community>=0.2.0",
73-
"langchain-openai>=0.2.0",
74-
"langchain-anthropic>=0.2.0",
71+
"langgraph>=0.4.8",
72+
"langchain-core>=0.3.65",
73+
"langchain-community>=0.3.25",
74+
"langchain-openai>=0.3.22",
75+
"langchain-anthropic>=0.3.15",
7576
"google-genai",
7677
"pydantic",
7778
"parameterized>=0.8.1",

0 commit comments

Comments
 (0)