Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "agent-eval"
version = "0.1.45"
version = "0.1.46"
description = "Agent evaluation toolkit"
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -15,7 +15,7 @@ dependencies = [
"inspect-ai>=0.3.104,<0.3.137",
# pin litellm so that we know what model costs we're using
# see the Development.md doc before changing
"litellm>=1.67.4.post1,<=1.75.8",
"litellm>=1.67.4.post1,<=1.82.3",
"pydantic>=2.0.0",
# For leaderboard
"huggingface_hub",
Expand Down
2 changes: 1 addition & 1 deletion src/agenteval/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def prep_litellm_cost_map():
# This snippet is mostly lifted from
# https://github.com/BerriAI/litellm/blob/b9621c760d3355e06dd17ec89b9eb6776755392e/litellm/litellm_core_utils/get_model_cost_map.py#L16
# See the Development.md before changing.
desired_model_costs_url = "https://raw.githubusercontent.com/BerriAI/litellm/eb66daeef740947c0326826817cf68fb56a8b931/litellm/model_prices_and_context_window_backup.json"
desired_model_costs_url = "https://raw.githubusercontent.com/BerriAI/litellm/9a5c778f1824641fe9f6c8dcc1d096fd9d8ef9f0/litellm/model_prices_and_context_window_backup.json"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how'd you choose this one? i ended up in the same place for running some other cost calcs. think we should take whatever the latest is

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the latest release marked "stable"
Screenshot 2026-03-25 at 4 28 44 PM

response = httpx.get(desired_model_costs_url, timeout=5)
response.raise_for_status()
desired_model_costs = response.json()
Expand Down
21 changes: 21 additions & 0 deletions src/agenteval/local_cost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from litellm.utils import CostPerToken
from pydantic import BaseModel

# even where these exist in https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
# calling cost_per_token does not return a cost, perhaps due to the associated provider
Expand Down Expand Up @@ -33,3 +34,23 @@
input_cost_per_token=1.8e-07, output_cost_per_token=1.8e-07
),
}


class CostPerTokenWithCache(BaseModel):
input_cost_per_token: float
output_cost_per_token: float
cache_read_input_token_cost: float


# Like CUSTOM_PRICING, but for models that also have a cache read discount.
# cost_per_token with usage_object doesn't work for these models in litellm 1.75.8,
# so costs are computed manually in compute_model_cost.
# key represents model name as found in inspect model_usage
CUSTOM_PRICING_WITH_CACHE = {
# costs from https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart
"moonshotai/kimi-k2.5-0127": CostPerTokenWithCache(
input_cost_per_token=6e-07,
output_cost_per_token=3e-06,
cache_read_input_token_cost=1e-07,
),
}
13 changes: 12 additions & 1 deletion src/agenteval/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from litellm.types.utils import PromptTokensDetailsWrapper, Usage
from pydantic import BaseModel

from .local_cost import CUSTOM_PRICING
from .local_cost import CUSTOM_PRICING, CUSTOM_PRICING_WITH_CACHE

logger = getLogger(__name__)

Expand Down Expand Up @@ -113,6 +113,17 @@ def compute_model_cost(model_usages: list[ModelUsageWithName]) -> float | None:
custom_cost_per_token=CUSTOM_PRICING[model_usage.model],
)

elif model_usage.model in CUSTOM_PRICING_WITH_CACHE.keys():

pricing = CUSTOM_PRICING_WITH_CACHE[model_usage.model]
cache_read_tokens = model_usage.usage.input_tokens_cache_read or 0
text_tokens = input_tokens - cache_read_tokens
prompt_cost = (
text_tokens * pricing.input_cost_per_token
+ cache_read_tokens * pricing.cache_read_input_token_cost
)
completion_cost = output_tokens * pricing.output_cost_per_token

else:
total_tokens = model_usage.usage.total_tokens

Expand Down
Loading