1313
1414TRACKER = threading .local ()
1515
16- ANTHROPHIC_CACHE_PRICING_FACTOR = {
16+ ANTHROPIC_CACHE_PRICING_FACTOR = {
1717 "cache_read_tokens" : 0.1 , # Cost for 5 min ephemeral cache. See Pricing Here: https://docs.anthropic.com/en/docs/about-claude/pricing#model-pricing
1818 "cache_write_tokens" : 1.25 ,
1919}
@@ -274,8 +274,8 @@ def get_effective_cost_from_antrophic_api(self, response) -> float:
274274 cache_read_tokens = getattr (usage , "cache_input_tokens" , 0 )
275275 cache_write_tokens = getattr (usage , "cache_creation_input_tokens" , 0 )
276276
277- cache_read_cost = self .input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR ["cache_read_tokens" ]
278- cache_write_cost = self .input_cost * ANTHROPHIC_CACHE_PRICING_FACTOR ["cache_write_tokens" ]
277+ cache_read_cost = self .input_cost * ANTHROPIC_CACHE_PRICING_FACTOR ["cache_read_tokens" ]
278+ cache_write_cost = self .input_cost * ANTHROPIC_CACHE_PRICING_FACTOR ["cache_write_tokens" ]
279279
280280 # Calculate the effective cost
281281 effective_cost = (
@@ -284,6 +284,10 @@ def get_effective_cost_from_antrophic_api(self, response) -> float:
284284 + cache_read_tokens * cache_read_cost
285285 + cache_write_tokens * cache_write_cost
286286 )
287+ if effective_cost < 0 :
288+ logging .warning (
289+ "Anthropic: Negative effective cost detected.(Impossible! Likely a bug)"
290+ )
287291 return effective_cost
288292
289293 def get_effective_cost_from_openai_api (self , response ) -> float :
@@ -308,25 +312,29 @@ def get_effective_cost_from_openai_api(self, response) -> float:
308312 return 0.0
309313 api_type = "chatcompletion" if hasattr (usage , "prompt_tokens_details" ) else "response"
310314 if api_type == "chatcompletion" :
311- total_input_tokens = usage .prompt_tokens
315+ total_input_tokens = usage .prompt_tokens # (cache read tokens + new input tokens)
312316 output_tokens = usage .completion_tokens
313317 cached_input_tokens = usage .prompt_tokens_details .cached_tokens
314- non_cached_input_tokens = total_input_tokens - cached_input_tokens
318+ new_input_tokens = total_input_tokens - cached_input_tokens
315319 elif api_type == "response" :
316- total_input_tokens = usage .input_tokens
320+ total_input_tokens = usage .input_tokens # (cache read tokens + new input tokens)
317321 output_tokens = usage .output_tokens
318322 cached_input_tokens = usage .input_tokens_details .cached_tokens
319- non_cached_input_tokens = total_input_tokens - cached_input_tokens
323+ new_input_tokens = total_input_tokens - cached_input_tokens
320324 else :
321325 logging .warning (f"Unsupported API type: { api_type } . Defaulting cost to 0.0." )
322326 return 0.0
323-
324327 cache_read_cost = self .input_cost * OPENAI_CACHE_PRICING_FACTOR ["cache_read_tokens" ]
325328 effective_cost = (
326- self .input_cost * non_cached_input_tokens
329+ self .input_cost * new_input_tokens
327330 + cached_input_tokens * cache_read_cost
328331 + self .output_cost * output_tokens
329332 )
333+ if effective_cost < 0 :
334+ logging .warning (
335+ f"OpenAI: Negative effective cost detected.(Impossible! Likely a bug). "
336+ f"New input tokens: { total_input_tokens } "
337+ )
330338 return effective_cost
331339
332340
0 commit comments