feat(usage): include usage and cost estimates in CLI output and saved traces; API /ask returns usage and cost_usd; add estimate_prediction_cost helper

haasonsaas · haasonsaas · commit 5237c65946e8 · 2025-09-08T20:02:25.000-07:00
diff --git a/README.md b/README.md
@@ -68,8 +68,8 @@ micro-agent replay --path traces/<id>.jsonl --index -1
 ## HTTP API
 - Start: `uvicorn micro_agent.server:app --reload --port 8000`
 - Endpoint: `POST /ask`
-  - Request JSON: `{ "question": "...", "max_steps": 6 }`
-  - Response JSON: `{ "answer": str, "trace_id": str, "trace_path": str, "steps": [...] }`
+  - Request JSON: `{ "question": "...", "max_steps": 6, "use_tool_calls": bool? }`
+  - Response JSON: `{ "answer": str, "trace_id": str, "trace_path": str, "steps": [...], "usage": {...}, "cost_usd": number }`
 
 Example:
 ```bash
diff --git a/micro_agent/cli.py b/micro_agent/cli.py
@@ -5,6 +5,7 @@
 from .config import configure_lm
 from .agent import MicroAgent
 from .runtime import dump_trace, new_trace_id
+from .costs import estimate_prediction_cost
 
 console = Console()
 
@@ -63,8 +64,12 @@ def main():
 
     pred = agent(q)
     trace_id = new_trace_id()
-    path = dump_trace(trace_id, q, pred.trace, pred.answer)
+    usage = getattr(pred, "usage", {}) or {}
+    est = estimate_prediction_cost(q, pred.trace, pred.answer, usage)
+    path = dump_trace(trace_id, q, pred.trace, pred.answer, usage=usage, cost_usd=est.get("cost_usd"))
 
     console.print(Panel.fit(pred.answer, title="ANSWER"))
     console.print()
+    console.print(Panel.fit(json.dumps({"usage": usage, "estimates": est}, indent=2, ensure_ascii=False), title="USAGE / ESTIMATES"))
+    console.print()
     console.print(Panel.fit(json.dumps(pred.trace, indent=2, ensure_ascii=False), title=f"TRACE (saved: {path})"))
diff --git a/micro_agent/costs.py b/micro_agent/costs.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import os
-from typing import Tuple
+from typing import Tuple, Any, Dict
 
 def _try_tiktoken(model: str):
     try:
@@ -63,3 +63,25 @@ def get_prices_per_1k(model: str, provider: str) -> Tuple[float, float]:
 def estimate_cost_usd(input_tokens: int, output_tokens: int, model: str, provider: str) -> float:
     in_price_1k, out_price_1k = get_prices_per_1k(model, provider)
     return (input_tokens / 1000.0) * in_price_1k + (output_tokens / 1000.0) * out_price_1k
+
+def estimate_prediction_cost(question: str, trace: Any, answer: str, usage: Dict[str, Any]) -> Dict[str, Any]:
+    """Estimate token usage and USD cost for a single prediction.
+
+    Heuristic: input tokens ~= lm_calls * tokens(question) + tokens(str(trace))
+               output tokens ~= tokens(answer)
+    """
+    provider = (usage or {}).get("provider") or "openai"
+    model = (usage or {}).get("model") or "gpt-4o-mini"
+    lm_calls = int((usage or {}).get("lm_calls", 0) or 0)
+
+    q_tokens = estimate_tokens(str(question or ""), model)
+    trace_tokens = estimate_tokens(str(trace or ""), model)
+    ans_tokens = estimate_tokens(str(answer or ""), model)
+    in_tokens = lm_calls * q_tokens + trace_tokens
+    out_tokens = ans_tokens
+    cost = estimate_cost_usd(in_tokens, out_tokens, model=model, provider=provider)
+    return {
+        "input_tokens": in_tokens,
+        "output_tokens": out_tokens,
+        "cost_usd": cost,
+    }
diff --git a/micro_agent/runtime.py b/micro_agent/runtime.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import json, os, re, time, uuid, datetime
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, Dict, List, Optional, TypedDict, NotRequired
 import ast
 try:
     import json_repair
@@ -21,18 +21,24 @@ class TraceRecord(TypedDict):
     question: str
     steps: List[Step]
     answer: str
+    usage: NotRequired[Dict[str, Any]]
+    cost_usd: NotRequired[float]
 
 def new_trace_id() -> str:
     return uuid.uuid4().hex
 
-def dump_trace(trace_id: str, question: str, steps: List[Step], answer: str) -> str:
+def dump_trace(trace_id: str, question: str, steps: List[Step], answer: str, *, usage: Optional[Dict[str, Any]] = None, cost_usd: Optional[float] = None) -> str:
     rec: TraceRecord = {
         "id": trace_id,
         "ts": datetime.datetime.now().isoformat(timespec="seconds"),
         "question": question,
         "steps": steps,
         "answer": answer,
     }
+    if usage is not None:
+        rec["usage"] = usage
+    if cost_usd is not None:
+        rec["cost_usd"] = float(cost_usd)
     path = os.path.join(TRACES_DIR, f"{trace_id}.jsonl")
     with open(path, "a", encoding="utf-8") as f:
         f.write(json.dumps(rec, ensure_ascii=False) + "\n")
diff --git a/micro_agent/server.py b/micro_agent/server.py
@@ -3,6 +3,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 import os, json
 from pydantic import BaseModel
+from .costs import estimate_prediction_cost
 from importlib.metadata import version as _pkg_version, PackageNotFoundError
 from .config import configure_lm
 from .agent import MicroAgent
@@ -27,6 +28,8 @@ class AskResponse(BaseModel):
     trace_id: str
     trace_path: str
     steps: list
+    usage: dict | None = None
+    cost_usd: float | None = None
 
 configure_lm()
 _agent = MicroAgent()
@@ -50,8 +53,10 @@ def ask(req: AskRequest):
     agent = _agent if req.use_tool_calls is None and req.max_steps == _agent.max_steps else MicroAgent(max_steps=req.max_steps, use_tool_calls=req.use_tool_calls)
     pred = agent(req.question)
     trace_id = new_trace_id()
-    path = dump_trace(trace_id, req.question, pred.trace, pred.answer)
-    return AskResponse(answer=pred.answer, trace_id=trace_id, trace_path=path, steps=pred.trace)
+    usage = getattr(pred, "usage", {}) or {}
+    est = estimate_prediction_cost(req.question, pred.trace, pred.answer, usage)
+    path = dump_trace(trace_id, req.question, pred.trace, pred.answer, usage=usage, cost_usd=est.get("cost_usd"))
+    return AskResponse(answer=pred.answer, trace_id=trace_id, trace_path=path, steps=pred.trace, usage=usage, cost_usd=est.get("cost_usd"))
 
 @app.get("/trace/{trace_id}")
 def get_trace(trace_id: str):