Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "agent-eval"
version = "0.1.45"
version = "0.1.46"
description = "Agent evaluation toolkit"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
21 changes: 21 additions & 0 deletions src/agenteval/local_cost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from litellm.utils import CostPerToken
from pydantic import BaseModel

# even where these exist in https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
# calling cost_per_token does not return a cost, perhaps due to the associated provider
Expand Down Expand Up @@ -33,3 +34,23 @@
input_cost_per_token=1.8e-07, output_cost_per_token=1.8e-07
),
}


class CostPerTokenWithCache(BaseModel):
input_cost_per_token: float
output_cost_per_token: float
cache_read_input_token_cost: float


# Like CUSTOM_PRICING, but for models that also have a cache read discount.
# cost_per_token with usage_object doesn't work for these models in litellm 1.75.8,
# so costs are computed manually in compute_model_cost.
# key represents model name as found in inspect model_usage
CUSTOM_PRICING_WITH_CACHE = {
# costs from https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart
"moonshotai/kimi-k2.5-0127": CostPerTokenWithCache(
input_cost_per_token=6e-07,
output_cost_per_token=3e-06,
cache_read_input_token_cost=1e-07,
),
}
13 changes: 12 additions & 1 deletion src/agenteval/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from litellm.types.utils import PromptTokensDetailsWrapper, Usage
from pydantic import BaseModel

from .local_cost import CUSTOM_PRICING
from .local_cost import CUSTOM_PRICING, CUSTOM_PRICING_WITH_CACHE

logger = getLogger(__name__)

Expand Down Expand Up @@ -113,6 +113,17 @@ def compute_model_cost(model_usages: list[ModelUsageWithName]) -> float | None:
custom_cost_per_token=CUSTOM_PRICING[model_usage.model],
)

elif model_usage.model in CUSTOM_PRICING_WITH_CACHE.keys():

pricing = CUSTOM_PRICING_WITH_CACHE[model_usage.model]
cache_read_tokens = model_usage.usage.input_tokens_cache_read or 0
text_tokens = input_tokens - cache_read_tokens
prompt_cost = (
text_tokens * pricing.input_cost_per_token
+ cache_read_tokens * pricing.cache_read_input_token_cost
)
completion_cost = output_tokens * pricing.output_cost_per_token

else:
total_tokens = model_usage.usage.total_tokens

Expand Down
Loading