|
1 | 1 | from __future__ import annotations |
2 | 2 | import os |
3 | | -from typing import Tuple |
| 3 | +from typing import Tuple, Any, Dict |
4 | 4 |
|
5 | 5 | def _try_tiktoken(model: str): |
6 | 6 | try: |
@@ -63,3 +63,25 @@ def get_prices_per_1k(model: str, provider: str) -> Tuple[float, float]: |
63 | 63 | def estimate_cost_usd(input_tokens: int, output_tokens: int, model: str, provider: str) -> float: |
64 | 64 | in_price_1k, out_price_1k = get_prices_per_1k(model, provider) |
65 | 65 | return (input_tokens / 1000.0) * in_price_1k + (output_tokens / 1000.0) * out_price_1k |
| 66 | + |
| 67 | +def estimate_prediction_cost(question: str, trace: Any, answer: str, usage: Dict[str, Any]) -> Dict[str, Any]: |
| 68 | + """Estimate token usage and USD cost for a single prediction. |
| 69 | +
|
| 70 | + Heuristic: input tokens ~= lm_calls * tokens(question) + tokens(str(trace)) |
| 71 | + output tokens ~= tokens(answer) |
| 72 | + """ |
| 73 | + provider = (usage or {}).get("provider") or "openai" |
| 74 | + model = (usage or {}).get("model") or "gpt-4o-mini" |
| 75 | + lm_calls = int((usage or {}).get("lm_calls", 0) or 0) |
| 76 | + |
| 77 | + q_tokens = estimate_tokens(str(question or ""), model) |
| 78 | + trace_tokens = estimate_tokens(str(trace or ""), model) |
| 79 | + ans_tokens = estimate_tokens(str(answer or ""), model) |
| 80 | + in_tokens = lm_calls * q_tokens + trace_tokens |
| 81 | + out_tokens = ans_tokens |
| 82 | + cost = estimate_cost_usd(in_tokens, out_tokens, model=model, provider=provider) |
| 83 | + return { |
| 84 | + "input_tokens": in_tokens, |
| 85 | + "output_tokens": out_tokens, |
| 86 | + "cost_usd": cost, |
| 87 | + } |
0 commit comments