Skip to content

Commit 8abc90f

Browse files
committed
Merge branch 'master' into feat/ph-ai/posthogai-pricing-metadata
2 parents b68fd09 + 499d545 commit 8abc90f

File tree

9 files changed

+2526
-23
lines changed

9 files changed

+2526
-23
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ jobs:
3636
run: |
3737
ruff format --check .
3838
39+
- name: Lint with ruff
40+
run: |
41+
ruff check .
42+
3943
- name: Check types with mypy
4044
run: |
4145
mypy --no-site-packages --config-file mypy.ini . | mypy-baseline filter

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# 6.9.2 - 2025-11-10
22

3-
- feat(ph-ai): PostHog properties dict in GenerationMetadata
3+
- fix(llma): fix cache token double subtraction in Langchain for non-Anthropic providers causing negative costs
44

55
# 6.9.1 - 2025-11-07
66

posthog/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,15 @@
1818
DEFAULT_CODE_VARIABLES_IGNORE_PATTERNS,
1919
DEFAULT_CODE_VARIABLES_MASK_PATTERNS,
2020
)
21-
from posthog.feature_flags import InconclusiveMatchError, RequiresServerEvaluation
22-
from posthog.types import FeatureFlag, FlagsAndPayloads, FeatureFlagResult
21+
from posthog.feature_flags import (
22+
InconclusiveMatchError as InconclusiveMatchError,
23+
RequiresServerEvaluation as RequiresServerEvaluation,
24+
)
25+
from posthog.types import (
26+
FeatureFlag,
27+
FlagsAndPayloads,
28+
FeatureFlagResult as FeatureFlagResult,
29+
)
2330
from posthog.version import VERSION
2431

2532
__version__ = VERSION

posthog/ai/langchain/callbacks.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ def _capture_generation(
582582
event_properties["$ai_is_error"] = True
583583
else:
584584
# Add usage
585-
usage = _parse_usage(output)
585+
usage = _parse_usage(output, run.provider, run.model)
586586
event_properties["$ai_input_tokens"] = usage.input_tokens
587587
event_properties["$ai_output_tokens"] = usage.output_tokens
588588
event_properties["$ai_cache_creation_input_tokens"] = (
@@ -703,6 +703,8 @@ class ModelUsage:
703703

704704
def _parse_usage_model(
705705
usage: Union[BaseModel, dict],
706+
provider: Optional[str] = None,
707+
model: Optional[str] = None,
706708
) -> ModelUsage:
707709
if isinstance(usage, BaseModel):
708710
usage = usage.__dict__
@@ -771,16 +773,30 @@ def _parse_usage_model(
771773
for mapped_key, dataclass_key in field_mapping.items()
772774
},
773775
)
774-
# In LangChain, input_tokens is the sum of input and cache read tokens.
775-
# Our cost calculation expects them to be separate, for Anthropic.
776-
if normalized_usage.input_tokens and normalized_usage.cache_read_tokens:
776+
# For Anthropic providers, LangChain reports input_tokens as the sum of input and cache read tokens.
777+
# Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens.
778+
# For other providers (OpenAI, etc.), input_tokens already includes cache tokens as expected.
779+
# Match logic consistent with plugin-server: exact match on provider OR substring match on model
780+
is_anthropic = False
781+
if provider and provider.lower() == "anthropic":
782+
is_anthropic = True
783+
elif model and "anthropic" in model.lower():
784+
is_anthropic = True
785+
786+
if (
787+
is_anthropic
788+
and normalized_usage.input_tokens
789+
and normalized_usage.cache_read_tokens
790+
):
777791
normalized_usage.input_tokens = max(
778792
normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0
779793
)
780794
return normalized_usage
781795

782796

783-
def _parse_usage(response: LLMResult) -> ModelUsage:
797+
def _parse_usage(
798+
response: LLMResult, provider: Optional[str] = None, model: Optional[str] = None
799+
) -> ModelUsage:
784800
# langchain-anthropic uses the usage field
785801
llm_usage_keys = ["token_usage", "usage"]
786802
llm_usage: ModelUsage = ModelUsage(
@@ -794,21 +810,25 @@ def _parse_usage(response: LLMResult) -> ModelUsage:
794810
if response.llm_output is not None:
795811
for key in llm_usage_keys:
796812
if response.llm_output.get(key):
797-
llm_usage = _parse_usage_model(response.llm_output[key])
813+
llm_usage = _parse_usage_model(
814+
response.llm_output[key], provider, model
815+
)
798816
break
799817

800818
if hasattr(response, "generations"):
801819
for generation in response.generations:
802820
if "usage" in generation:
803-
llm_usage = _parse_usage_model(generation["usage"])
821+
llm_usage = _parse_usage_model(generation["usage"], provider, model)
804822
break
805823

806824
for generation_chunk in generation:
807825
if generation_chunk.generation_info and (
808826
"usage_metadata" in generation_chunk.generation_info
809827
):
810828
llm_usage = _parse_usage_model(
811-
generation_chunk.generation_info["usage_metadata"]
829+
generation_chunk.generation_info["usage_metadata"],
830+
provider,
831+
model,
812832
)
813833
break
814834

@@ -835,7 +855,7 @@ def _parse_usage(response: LLMResult) -> ModelUsage:
835855
bedrock_anthropic_usage or bedrock_titan_usage or ollama_usage
836856
)
837857
if chunk_usage:
838-
llm_usage = _parse_usage_model(chunk_usage)
858+
llm_usage = _parse_usage_model(chunk_usage, provider, model)
839859
break
840860

841861
return llm_usage

posthog/exception_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ def _compile_patterns(patterns):
929929
for pattern in patterns:
930930
try:
931931
compiled.append(re.compile(pattern))
932-
except:
932+
except Exception:
933933
pass
934934
return compiled
935935

posthog/test/ai/langchain/test_callbacks.py

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,13 +1586,58 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15861586
generation_props = generation_args["properties"]
15871587

15881588
assert generation_args["event"] == "$ai_generation"
1589-
assert generation_props["$ai_input_tokens"] == 400
1589+
assert (
1590+
generation_props["$ai_input_tokens"] == 1200
1591+
) # No provider metadata, no subtraction
15901592
assert generation_props["$ai_output_tokens"] == 30
15911593
assert generation_props["$ai_cache_creation_input_tokens"] == 0
15921594
assert generation_props["$ai_cache_read_input_tokens"] == 800
15931595
assert generation_props["$ai_reasoning_tokens"] == 0
15941596

15951597

1598+
def test_anthropic_provider_subtracts_cache_tokens(mock_client):
1599+
"""Test that Anthropic provider correctly subtracts cache tokens from input tokens."""
1600+
from langchain_core.outputs import LLMResult, ChatGeneration
1601+
from langchain_core.messages import AIMessage
1602+
from uuid import uuid4
1603+
1604+
cb = CallbackHandler(mock_client)
1605+
run_id = uuid4()
1606+
1607+
# Set up with Anthropic provider
1608+
cb._set_llm_metadata(
1609+
serialized={},
1610+
run_id=run_id,
1611+
messages=[{"role": "user", "content": "test"}],
1612+
metadata={"ls_provider": "anthropic", "ls_model_name": "claude-3-sonnet"},
1613+
)
1614+
1615+
# Response with cache tokens: 1200 input (includes 800 cached)
1616+
response = LLMResult(
1617+
generations=[
1618+
[
1619+
ChatGeneration(
1620+
message=AIMessage(content="Response"),
1621+
generation_info={
1622+
"usage_metadata": {
1623+
"input_tokens": 1200,
1624+
"output_tokens": 50,
1625+
"cache_read_input_tokens": 800,
1626+
}
1627+
},
1628+
)
1629+
]
1630+
],
1631+
llm_output={},
1632+
)
1633+
1634+
cb._pop_run_and_capture_generation(run_id, None, response)
1635+
1636+
generation_args = mock_client.capture.call_args_list[0][1]
1637+
assert generation_args["properties"]["$ai_input_tokens"] == 400 # 1200 - 800
1638+
assert generation_args["properties"]["$ai_cache_read_input_tokens"] == 800
1639+
1640+
15961641
def test_openai_cache_read_tokens(mock_client):
15971642
"""Test that OpenAI cache read tokens are captured correctly."""
15981643
prompt = ChatPromptTemplate.from_messages(
@@ -1628,7 +1673,7 @@ def test_openai_cache_read_tokens(mock_client):
16281673
generation_props = generation_args["properties"]
16291674

16301675
assert generation_args["event"] == "$ai_generation"
1631-
assert generation_props["$ai_input_tokens"] == 50
1676+
assert generation_props["$ai_input_tokens"] == 150 # No subtraction for OpenAI
16321677
assert generation_props["$ai_output_tokens"] == 40
16331678
assert generation_props["$ai_cache_read_input_tokens"] == 100
16341679
assert generation_props["$ai_cache_creation_input_tokens"] == 0
@@ -1710,15 +1755,15 @@ def test_combined_reasoning_and_cache_tokens(mock_client):
17101755
generation_props = generation_args["properties"]
17111756

17121757
assert generation_args["event"] == "$ai_generation"
1713-
assert generation_props["$ai_input_tokens"] == 200
1758+
assert generation_props["$ai_input_tokens"] == 500 # No subtraction for OpenAI
17141759
assert generation_props["$ai_output_tokens"] == 100
17151760
assert generation_props["$ai_cache_read_input_tokens"] == 300
17161761
assert generation_props["$ai_cache_creation_input_tokens"] == 0
17171762
assert generation_props["$ai_reasoning_tokens"] == 60
17181763

17191764

17201765
@pytest.mark.skipif(not OPENAI_API_KEY, reason="OPENAI_API_KEY is not set")
1721-
def test_openai_reasoning_tokens(mock_client):
1766+
def test_openai_reasoning_tokens_o4_mini(mock_client):
17221767
model = ChatOpenAI(
17231768
api_key=OPENAI_API_KEY, model="o4-mini", max_completion_tokens=10
17241769
)
@@ -1919,8 +1964,8 @@ def test_cache_read_tokens_subtraction_from_input_tokens(mock_client):
19191964
generation_props = generation_args["properties"]
19201965

19211966
assert generation_args["event"] == "$ai_generation"
1922-
# Input tokens should be reduced: 150 - 100 = 50
1923-
assert generation_props["$ai_input_tokens"] == 50
1967+
# Input tokens not reduced without provider metadata
1968+
assert generation_props["$ai_input_tokens"] == 150
19241969
assert generation_props["$ai_output_tokens"] == 40
19251970
assert generation_props["$ai_cache_read_input_tokens"] == 100
19261971

@@ -1961,8 +2006,8 @@ def test_cache_read_tokens_subtraction_prevents_negative(mock_client):
19612006
generation_props = generation_args["properties"]
19622007

19632008
assert generation_args["event"] == "$ai_generation"
1964-
# Input tokens should be 0, not negative: max(80 - 100, 0) = 0
1965-
assert generation_props["$ai_input_tokens"] == 0
2009+
# Input tokens not reduced without provider metadata
2010+
assert generation_props["$ai_input_tokens"] == 80
19662011
assert generation_props["$ai_output_tokens"] == 20
19672012
assert generation_props["$ai_cache_read_input_tokens"] == 100
19682013

posthog/test/integrations/test_middleware.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,9 @@ def test_sync_middleware_with_filter(self):
315315
get_response = Mock(return_value=mock_response)
316316

317317
# Create middleware with request filter that filters all requests
318-
request_filter = lambda req: False
318+
def request_filter(req):
319+
return False
320+
319321
middleware = PosthogContextMiddleware.__new__(PosthogContextMiddleware)
320322
middleware.get_response = get_response
321323
middleware._is_coroutine = False

0 commit comments

Comments
 (0)