|
14 | 14 |
|
15 | 15 | TRACKER = threading.local() |
16 | 16 |
|
| 17 | +ANTHROPHIC_CACHE_PRICING_FACTOR = { |
| 18 | + "cache_read_tokens": 0.1, # Cost for 5 min ephemeral cache. See Pricing Here: https://docs.anthropic.com/en/docs/about-claude/pricing#model-pricing |
| 19 | + "cache_write_tokens": 1.25, |
| 20 | +} |
| 21 | + |
| 22 | +OPENAI_CACHE_PRICING_FACTOR = { |
| 23 | + "cache_read_tokens": 0.5, # This is a an upper bound. See Pricing Here: https://platform.openai.com/docs/pricing |
| 24 | + "cache_write_tokens": 1, |
| 25 | +} |
| 26 | + |
17 | 27 |
|
18 | 28 | class LLMTracker: |
19 | 29 | def __init__(self, suffix=""): |
@@ -156,8 +166,8 @@ def __call__(self, *args, **kwargs): |
156 | 166 | usage = dict(getattr(response, "usage", {})) |
157 | 167 | usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))} |
158 | 168 | usage |= {"n_api_calls": 1} |
| 169 | + usage |= {"effective_cost": self.get_effective_cost(response)} |
159 | 170 | self.stats.increment_stats_dict(usage) |
160 | | - |
161 | 171 | self.update_pricing_tracker(response) |
162 | 172 | return self._parse_response(response) |
163 | 173 |
|
@@ -229,6 +239,67 @@ def get_tokens_counts_from_response(self, response) -> tuple: |
229 | 239 | ) |
230 | 240 | return 0, 0 |
231 | 241 |
|
| 242 | + def get_effective_cost(self, response): |
| 243 | + """Get the effective cost from the response based on the provider.""" |
| 244 | + if self._pricing_api == "anthropic": |
| 245 | + return self.get_effective_cost_from_antrophic_api(response) |
| 246 | + elif self._pricing_api == "openai": |
| 247 | + return self.get_effective_cost_from_openai_api(response) |
| 248 | + else: |
| 249 | + logging.warning( |
| 250 | + f"Unsupported provider: {self._pricing_api}. No effective cost calculated." |
| 251 | + ) |
| 252 | + return 0.0 |
| 253 | + |
| 254 | + def get_effective_cost_from_antrophic_api(self, response): |
| 255 | + """Get the effective cost from the Anthropic API response. |
| 256 | + ## Anthropic usage 'input_tokens' are new input tokens (tokens that are not cached). |
| 257 | + ## Anthorphic has different pricing for cache write and cache read tokens. |
| 258 | + ## See https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#tracking-cache-performance |
| 259 | + """ |
| 260 | + usage = getattr(response, "usage", {}) |
| 261 | + new_input_tokens = getattr(usage, "input_tokens", 0) # new input tokens |
| 262 | + output_tokens = getattr(usage, "output_tokens", 0) |
| 263 | + cache_read_tokens = getattr(usage, "cache_input_tokens", 0) |
| 264 | + cache_write_tokens = getattr(usage, "cache_creation_input_tokens", 0) |
| 265 | + |
| 266 | + cache_read_cost = self.input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR["cache_read_tokens"] |
| 267 | + cache_write_cost = self.input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR["cache_write_tokens"] |
| 268 | + |
| 269 | + # Calculate the effective cost |
| 270 | + effective_cost = ( |
| 271 | + new_input_tokens * self.input_cost |
| 272 | + + output_tokens * self.output_cost |
| 273 | + + cache_read_tokens * cache_read_cost |
| 274 | + + cache_write_tokens * cache_write_cost |
| 275 | + ) |
| 276 | + return effective_cost |
| 277 | + |
| 278 | + def get_effective_cost_from_openai_api(self, response): |
| 279 | + """Get the effective cost from the OpenAI API response. |
| 280 | + ## OpenAI usage 'prompt_tokens' are the total input tokens (cache read tokens + new input tokens). |
| 281 | + ## See https://openai.com/index/api-prompt-caching/ |
| 282 | + ## OpenAI has only one price for cache tokens i.e. cache read price. (Generally 50% cheaper) |
| 283 | + ## OpenAI had no extra charge for cache write tokens. |
| 284 | + ## See Pricing Here: https://platform.openai.com/docs/pricing |
| 285 | + """ |
| 286 | + usage = getattr(response, "usage", {}) |
| 287 | + prompt_token_details = getattr(response, "prompt_tokens_details", {}) |
| 288 | + |
| 289 | + total_input_tokens = getattr(prompt_token_details, "prompt_tokens", 0) # Cache read tokens + new input tokens |
| 290 | + output_tokens = getattr(usage, "completion_tokens", 0) |
| 291 | + cache_read_tokens = getattr(prompt_token_details, "cached_tokens", 0) |
| 292 | + |
| 293 | + non_cached_input_tokens = total_input_tokens - cache_read_tokens |
| 294 | + cache_read_cost = self.input_cost * OPENAI_CACHE_PRICING_FACTOR["cache_read_tokens"] |
| 295 | + |
| 296 | + effective_cost = ( |
| 297 | + self.input_cost * non_cached_input_tokens |
| 298 | + + cache_read_tokens * cache_read_cost |
| 299 | + + self.output_cost * output_tokens |
| 300 | + ) |
| 301 | + return effective_cost |
| 302 | + |
232 | 303 |
|
233 | 304 | @dataclass |
234 | 305 | class Stats: |
|
0 commit comments