@@ -163,6 +163,10 @@ def __call__(self, *args, **kwargs):
163163 response = self ._call_api (* args , ** kwargs )
164164
165165 usage = dict (getattr (response , "usage" , {}))
166+ if "prompt_tokens_details" in usage :
167+ usage ["cached_tokens" ] = usage ["prompt_token_details" ].cached_tokens
168+ if "input_tokens_details" in usage :
169+ usage ["cached_tokens" ] = usage ["input_tokens_details" ].cached_tokens
166170 usage = {f"usage_{ k } " : v for k , v in usage .items () if isinstance (v , (int , float ))}
167171 usage |= {"n_api_calls" : 1 }
168172 usage |= {"effective_cost" : self .get_effective_cost (response )}
@@ -298,21 +302,29 @@ def get_effective_cost_from_openai_api(self, response) -> float:
298302 Returns:
299303 float: The effective cost calculated from the response.
300304 """
301- usage = getattr (response , "usage" , {})
302- prompt_token_details = getattr (response , "prompt_tokens_details" , {})
303-
304- total_input_tokens = getattr (
305- prompt_token_details , "prompt_tokens" , 0
306- ) # Cache read tokens + new input tokens
307- output_tokens = getattr (usage , "completion_tokens" , 0 )
308- cache_read_tokens = getattr (prompt_token_details , "cached_tokens" , 0 )
305+ usage = getattr (response , "usage" , None )
306+ if usage is None :
307+ logging .warning ("No usage information found in the response. Defaulting cost to 0.0." )
308+ return 0.0
309+ api_type = "chatcompletion" if hasattr (usage , "prompt_tokens_details" ) else "response"
310+ if api_type == "chatcompletion" :
311+ total_input_tokens = usage .prompt_tokens
312+ output_tokens = usage .completion_tokens
313+ cached_input_tokens = usage .prompt_tokens_details .cached_tokens
314+ non_cached_input_tokens = total_input_tokens - cached_input_tokens
315+ elif api_type == "response" :
316+ total_input_tokens = usage .input_tokens
317+ output_tokens = usage .output_tokens
318+ cached_input_tokens = usage .input_tokens_details .cached_tokens
319+ non_cached_input_tokens = total_input_tokens - cached_input_tokens
320+ else :
321+ logging .warning (f"Unsupported API type: { api_type } . Defaulting cost to 0.0." )
322+ return 0.0
309323
310- non_cached_input_tokens = total_input_tokens - cache_read_tokens
311324 cache_read_cost = self .input_cost * OPENAI_CACHE_PRICING_FACTOR ["cache_read_tokens" ]
312-
313325 effective_cost = (
314326 self .input_cost * non_cached_input_tokens
315- + cache_read_tokens * cache_read_cost
327+ + cached_input_tokens * cache_read_cost
316328 + self .output_cost * output_tokens
317329 )
318330 return effective_cost
0 commit comments