Skip to content

Commit 8523f83

Browse files
Merge pull request #260 from ServiceNow/enhance-cost-tracking
Update cost-tracking for OAI chatcompletions and response API
2 parents f272fbb + 36b092a commit 8523f83

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

src/agentlab/llm/tracking.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ def __call__(self, *args, **kwargs):
163163
response = self._call_api(*args, **kwargs)
164164

165165
usage = dict(getattr(response, "usage", {}))
166+
if "prompt_tokens_details" in usage:
167+
usage["cached_tokens"] = usage["prompt_token_details"].cached_tokens
168+
if "input_tokens_details" in usage:
169+
usage["cached_tokens"] = usage["input_tokens_details"].cached_tokens
166170
usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))}
167171
usage |= {"n_api_calls": 1}
168172
usage |= {"effective_cost": self.get_effective_cost(response)}
@@ -298,21 +302,29 @@ def get_effective_cost_from_openai_api(self, response) -> float:
298302
Returns:
299303
float: The effective cost calculated from the response.
300304
"""
301-
usage = getattr(response, "usage", {})
302-
prompt_token_details = getattr(response, "prompt_tokens_details", {})
303-
304-
total_input_tokens = getattr(
305-
prompt_token_details, "prompt_tokens", 0
306-
) # Cache read tokens + new input tokens
307-
output_tokens = getattr(usage, "completion_tokens", 0)
308-
cache_read_tokens = getattr(prompt_token_details, "cached_tokens", 0)
305+
usage = getattr(response, "usage", None)
306+
if usage is None:
307+
logging.warning("No usage information found in the response. Defaulting cost to 0.0.")
308+
return 0.0
309+
api_type = "chatcompletion" if hasattr(usage, "prompt_tokens_details") else "response"
310+
if api_type == "chatcompletion":
311+
total_input_tokens = usage.prompt_tokens
312+
output_tokens = usage.completion_tokens
313+
cached_input_tokens = usage.prompt_tokens_details.cached_tokens
314+
non_cached_input_tokens = total_input_tokens - cached_input_tokens
315+
elif api_type == "response":
316+
total_input_tokens = usage.input_tokens
317+
output_tokens = usage.output_tokens
318+
cached_input_tokens = usage.input_tokens_details.cached_tokens
319+
non_cached_input_tokens = total_input_tokens - cached_input_tokens
320+
else:
321+
logging.warning(f"Unsupported API type: {api_type}. Defaulting cost to 0.0.")
322+
return 0.0
309323

310-
non_cached_input_tokens = total_input_tokens - cache_read_tokens
311324
cache_read_cost = self.input_cost * OPENAI_CACHE_PRICING_FACTOR["cache_read_tokens"]
312-
313325
effective_cost = (
314326
self.input_cost * non_cached_input_tokens
315-
+ cache_read_tokens * cache_read_cost
327+
+ cached_input_tokens * cache_read_cost
316328
+ self.output_cost * output_tokens
317329
)
318330
return effective_cost

0 commit comments

Comments
 (0)