allenai · regan-huff · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "agent-eval"
-version = "0.1.45"
+version = "0.1.46"
 description = "Agent evaluation toolkit"
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/agenteval/local_cost.py b/src/agenteval/local_cost.py
@@ -1,4 +1,5 @@
 from litellm.utils import CostPerToken
+from pydantic import BaseModel
 
 # even where these exist in https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
 # calling cost_per_token does not return a cost, perhaps due to the associated provider
@@ -33,3 +34,23 @@
         input_cost_per_token=1.8e-07, output_cost_per_token=1.8e-07
     ),
 }
+
+
+class CostPerTokenWithCache(BaseModel):
+    input_cost_per_token: float
+    output_cost_per_token: float
+    cache_read_input_token_cost: float
+
+
+# Like CUSTOM_PRICING, but for models that also have a cache read discount.
+# cost_per_token with usage_object doesn't work for these models in litellm 1.75.8,
+# so costs are computed manually in compute_model_cost.
+# key represents model name as found in inspect model_usage
+CUSTOM_PRICING_WITH_CACHE = {
+    # costs from https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart
+    "moonshotai/kimi-k2.5-0127": CostPerTokenWithCache(
+        input_cost_per_token=6e-07,
+        output_cost_per_token=3e-06,
+        cache_read_input_token_cost=1e-07,
+    ),
+}
diff --git a/src/agenteval/log.py b/src/agenteval/log.py
@@ -15,7 +15,7 @@
 from litellm.types.utils import PromptTokensDetailsWrapper, Usage
 from pydantic import BaseModel
 
-from .local_cost import CUSTOM_PRICING
+from .local_cost import CUSTOM_PRICING, CUSTOM_PRICING_WITH_CACHE
 
 logger = getLogger(__name__)
 
@@ -113,6 +113,17 @@ def compute_model_cost(model_usages: list[ModelUsageWithName]) -> float | None:
                     custom_cost_per_token=CUSTOM_PRICING[model_usage.model],
                 )
 
+            elif model_usage.model in CUSTOM_PRICING_WITH_CACHE.keys():
+
+                pricing = CUSTOM_PRICING_WITH_CACHE[model_usage.model]
+                cache_read_tokens = model_usage.usage.input_tokens_cache_read or 0
+                text_tokens = input_tokens - cache_read_tokens
+                prompt_cost = (
+                    text_tokens * pricing.input_cost_per_token
+                    + cache_read_tokens * pricing.cache_read_input_token_cost
+                )
+                completion_cost = output_tokens * pricing.output_cost_per_token
+
             else:
                 total_tokens = model_usage.usage.total_tokens