Skip to content

Commit c1a85a2

Browse files
committed
feat(evals): approximate cost using token estimates; ci(release): add PyPI publish workflow on tags; docs: note cost env vars in Evals section
1 parent e60034e commit c1a85a2

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

.github/workflows/release.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Release
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*'
7+
8+
jobs:
9+
build-and-publish:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: read
13+
id-token: write
14+
steps:
15+
- uses: actions/checkout@v4
16+
- uses: actions/setup-python@v5
17+
with:
18+
python-version: '3.12'
19+
- name: Build sdist and wheel
20+
run: |
21+
python -m pip install --upgrade pip build
22+
python -m build
23+
- name: Publish to PyPI
24+
uses: pypa/gh-action-pypi-publish@release/v1
25+
with:
26+
password: ${{ secrets.PYPI_API_TOKEN }}

evals/run_evals.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from micro_agent.config import configure_lm
55
from micro_agent.agent import MicroAgent
66
from micro_agent.runtime import new_trace_id, dump_trace
7+
from micro_agent.costs import estimate_tokens, estimate_cost_usd
78

89
def load_yaml(path: str):
910
with open(path, "r", encoding="utf-8") as f:
@@ -70,10 +71,21 @@ def main():
7071
latencies.append(dt)
7172
# Basic usage tracking (provided by MicroAgent)
7273
usage = getattr(pred, "usage", {}) or {}
73-
lm_calls_list.append(int(usage.get("lm_calls", 0) or 0))
74+
lm_calls = int(usage.get("lm_calls", 0) or 0)
75+
lm_calls_list.append(lm_calls)
7476
tool_calls_list.append(int(usage.get("tool_calls", 0) or 0))
7577
steps_list.append(len(pred.trace or []))
76-
costs_list.append(float(usage.get("cost", 0.0) or 0.0))
78+
79+
# Approximate cost (tokens) per run using simple heuristics
80+
provider = usage.get("provider") or "openai"
81+
model = usage.get("model") or "gpt-4o-mini"
82+
q_text = str(q)
83+
trace_text = json.dumps(pred.trace, ensure_ascii=False)
84+
ans_text = str(pred.answer or "")
85+
# Rough input tokens ~ (lm_calls * question) + final trace
86+
in_tokens = lm_calls * estimate_tokens(q_text, model=model) + estimate_tokens(trace_text, model=model)
87+
out_tokens = estimate_tokens(ans_text, model=model)
88+
costs_list.append(estimate_cost_usd(in_tokens, out_tokens, model=model, provider=provider))
7789

7890
print(f"[{i}/{len(dataset)}] s={s} t={dt:.2f}s q={q!r}")
7991

0 commit comments

Comments
 (0)