Skip to content

Commit b1dc69c

Browse files
Added effective cost using cache per api call in stats
1 parent 99595a9 commit b1dc69c

File tree

1 file changed

+72
-1
lines changed

1 file changed

+72
-1
lines changed

src/agentlab/llm/tracking.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@
1414

1515
TRACKER = threading.local()
1616

17+
ANTHROPHIC_CACHE_PRICING_FACTOR = {
18+
"cache_read_tokens": 0.1, # Cost for 5 min ephemeral cache. See Pricing Here: https://docs.anthropic.com/en/docs/about-claude/pricing#model-pricing
19+
"cache_write_tokens": 1.25,
20+
}
21+
22+
OPENAI_CACHE_PRICING_FACTOR = {
23+
"cache_read_tokens": 0.5, # This is a an upper bound. See Pricing Here: https://platform.openai.com/docs/pricing
24+
"cache_write_tokens": 1,
25+
}
26+
1727

1828
class LLMTracker:
1929
def __init__(self, suffix=""):
@@ -156,8 +166,8 @@ def __call__(self, *args, **kwargs):
156166
usage = dict(getattr(response, "usage", {}))
157167
usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))}
158168
usage |= {"n_api_calls": 1}
169+
usage |= {"effective_cost": self.get_effective_cost(response)}
159170
self.stats.increment_stats_dict(usage)
160-
161171
self.update_pricing_tracker(response)
162172
return self._parse_response(response)
163173

@@ -229,6 +239,67 @@ def get_tokens_counts_from_response(self, response) -> tuple:
229239
)
230240
return 0, 0
231241

242+
def get_effective_cost(self, response):
243+
"""Get the effective cost from the response based on the provider."""
244+
if self._pricing_api == "anthropic":
245+
return self.get_effective_cost_from_antrophic_api(response)
246+
elif self._pricing_api == "openai":
247+
return self.get_effective_cost_from_openai_api(response)
248+
else:
249+
logging.warning(
250+
f"Unsupported provider: {self._pricing_api}. No effective cost calculated."
251+
)
252+
return 0.0
253+
254+
def get_effective_cost_from_antrophic_api(self, response):
255+
"""Get the effective cost from the Anthropic API response.
256+
## Anthropic usage 'input_tokens' are new input tokens (tokens that are not cached).
257+
## Anthorphic has different pricing for cache write and cache read tokens.
258+
## See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#tracking-cache-performance
259+
"""
260+
usage = getattr(response, "usage", {})
261+
new_input_tokens = getattr(usage, "input_tokens", 0) # new input tokens
262+
output_tokens = getattr(usage, "output_tokens", 0)
263+
cache_read_tokens = getattr(usage, "cache_input_tokens", 0)
264+
cache_write_tokens = getattr(usage, "cache_creation_input_tokens", 0)
265+
266+
cache_read_cost = self.input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR["cache_read_tokens"]
267+
cache_write_cost = self.input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR["cache_write_tokens"]
268+
269+
# Calculate the effective cost
270+
effective_cost = (
271+
new_input_tokens * self.input_cost
272+
+ output_tokens * self.output_cost
273+
+ cache_read_tokens * cache_read_cost
274+
+ cache_write_tokens * cache_write_cost
275+
)
276+
return effective_cost
277+
278+
def get_effective_cost_from_openai_api(self, response):
279+
"""Get the effective cost from the OpenAI API response.
280+
## OpenAI usage 'prompt_tokens' are the total input tokens (cache read tokens + new input tokens).
281+
## See https://openai.com/index/api-prompt-caching/
282+
## OpenAI has only one price for cache tokens i.e. cache read price. (Generally 50% cheaper)
283+
## OpenAI had no extra charge for cache write tokens.
284+
## See Pricing Here: https://platform.openai.com/docs/pricing
285+
"""
286+
usage = getattr(response, "usage", {})
287+
prompt_token_details = getattr(response, "prompt_tokens_details", {})
288+
289+
total_input_tokens = getattr(prompt_token_details, "prompt_tokens", 0) # Cache read tokens + new input tokens
290+
output_tokens = getattr(usage, "completion_tokens", 0)
291+
cache_read_tokens = getattr(prompt_token_details, "cached_tokens", 0)
292+
293+
non_cached_input_tokens = total_input_tokens - cache_read_tokens
294+
cache_read_cost = self.input_cost * OPENAI_CACHE_PRICING_FACTOR["cache_read_tokens"]
295+
296+
effective_cost = (
297+
self.input_cost * non_cached_input_tokens
298+
+ cache_read_tokens * cache_read_cost
299+
+ self.output_cost * output_tokens
300+
)
301+
return effective_cost
302+
232303

233304
@dataclass
234305
class Stats:

0 commit comments

Comments
 (0)