Skip to content

Commit 53ab5cb

Browse files
amabitoamabito
andauthored
feat(agent-sre): enforce org_monthly_budget in CostGuard.check_task()
org_monthly_budget was tracked in record_cost() but never checked in check_task(). 50 agents at $100/day could blow a $5,000 monthly budget on day one with zero enforcement. Changes: - check_task() now rejects tasks that would exceed org_monthly_budget - record_cost() emits CRITICAL kill alert when org spend crosses kill_switch_threshold (matching per-agent kill pattern) Closes #238 Co-authored-by: amabito <amabito@local>
1 parent 23a323a commit 53ab5cb

File tree

2 files changed

+258
-0
lines changed

2 files changed

+258
-0
lines changed

packages/agent-sre/src/agent_sre/cost/guard.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,13 @@ def check_task(self, agent_id: str, estimated_cost: float = 0.0) -> tuple[bool,
176176
if budget.spent_today_usd + estimated_cost > budget.daily_limit_usd:
177177
return False, f"Would exceed daily budget (${budget.remaining_today_usd:.2f} remaining)"
178178

179+
if self.org_monthly_budget > 0:
180+
if self._org_spent_month + estimated_cost > self.org_monthly_budget:
181+
return False, (
182+
f"Would exceed org monthly budget "
183+
f"(${self.org_remaining_month:.2f} remaining)"
184+
)
185+
179186
return True, "ok"
180187

181188
def record_cost(
@@ -251,6 +258,28 @@ def record_cost(
251258
action=BudgetAction.THROTTLE,
252259
))
253260

261+
# Org budget kill alert
262+
if self.auto_throttle and self.org_monthly_budget > 0:
263+
org_util = self._org_spent_month / self.org_monthly_budget
264+
prev_org_util = (
265+
(self._org_spent_month - cost_usd) / self.org_monthly_budget
266+
if self.org_monthly_budget > 0 else 0.0
267+
)
268+
if prev_org_util < self.kill_switch_threshold <= org_util:
269+
for b in self._budgets.values():
270+
b.killed = True
271+
alerts.append(CostAlert(
272+
severity=CostAlertSeverity.CRITICAL,
273+
message=(
274+
f"Org budget kill switch triggered -- "
275+
f"{org_util * 100:.0f}% of monthly budget consumed"
276+
),
277+
agent_id=agent_id,
278+
current_value=self._org_spent_month,
279+
threshold=self.org_monthly_budget * self.kill_switch_threshold,
280+
action=BudgetAction.KILL,
281+
))
282+
254283
# Anomaly detection
255284
if self.anomaly_detection and len(self._cost_history) >= 10:
256285
anomaly_alert = self._check_anomaly(agent_id, cost_usd)

packages/agent-sre/tests/unit/test_cost.py

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,232 @@ def test_summary(self) -> None:
125125
s = guard.summary()
126126
assert s["total_records"] == 1
127127
assert "bot-1" in s["agents"]
128+
129+
def test_check_task_exceeds_org_budget(self) -> None:
130+
guard = CostGuard(
131+
per_task_limit=100.0,
132+
per_agent_daily_limit=1000.0,
133+
org_monthly_budget=50.0,
134+
)
135+
guard.record_cost("bot-1", "t1", 30.0)
136+
guard.record_cost("bot-2", "t2", 15.0)
137+
# 45 spent, trying to add 10 -> 55 > 50
138+
allowed, reason = guard.check_task("bot-3", estimated_cost=10.0)
139+
assert allowed is False
140+
assert "org monthly budget" in reason.lower()
141+
142+
def test_check_task_within_org_budget(self) -> None:
143+
guard = CostGuard(
144+
per_task_limit=100.0,
145+
per_agent_daily_limit=1000.0,
146+
org_monthly_budget=100.0,
147+
)
148+
guard.record_cost("bot-1", "t1", 30.0)
149+
allowed, reason = guard.check_task("bot-2", estimated_cost=10.0)
150+
assert allowed is True
151+
152+
def test_org_budget_kill_alert(self) -> None:
153+
guard = CostGuard(
154+
per_task_limit=1000.0,
155+
per_agent_daily_limit=10000.0,
156+
org_monthly_budget=100.0,
157+
auto_throttle=True,
158+
kill_switch_threshold=0.95,
159+
)
160+
alerts = guard.record_cost("bot-1", "t1", 96.0) # 96% of org budget
161+
kill_alerts = [a for a in alerts if "org budget" in a.message.lower() and "kill" in a.message.lower()]
162+
assert len(kill_alerts) >= 1
163+
164+
def test_org_budget_multi_agent_aggregate(self) -> None:
165+
guard = CostGuard(
166+
per_task_limit=100.0,
167+
per_agent_daily_limit=100.0,
168+
org_monthly_budget=50.0,
169+
)
170+
# Each agent within daily limit, but org total exceeds
171+
guard.record_cost("bot-1", "t1", 20.0)
172+
guard.record_cost("bot-2", "t2", 20.0)
173+
guard.record_cost("bot-3", "t3", 15.0)
174+
# 55 total > 50 org budget; bot-4 should be blocked
175+
allowed, reason = guard.check_task("bot-4", estimated_cost=1.0)
176+
assert allowed is False
177+
assert "org monthly budget" in reason.lower()
178+
179+
180+
import pytest
181+
182+
183+
class TestCostGuardOrgBudgetAdversarial:
184+
"""Adversarial tests for org_monthly_budget enforcement."""
185+
186+
# Rule 11: Boundary Triple (limit-1, limit, limit+1)
187+
# NOTE: Implementation uses strict inequality (spent + estimated > budget),
188+
# so estimated==budget with spent==0 is allowed (equal is not exceeding).
189+
# The boundary that denies is any value strictly above the budget.
190+
@pytest.mark.parametrize("estimated,expected", [
191+
(49.99, True), # below limit -- allowed
192+
(50.00, True), # at limit exactly -- allowed (0 + 50.0 > 50.0 is False)
193+
(50.01, False), # above limit -- denied
194+
])
195+
def test_org_budget_boundary_triple(self, estimated: float, expected: bool) -> None:
196+
guard = CostGuard(
197+
per_task_limit=100.0,
198+
per_agent_daily_limit=1000.0,
199+
org_monthly_budget=50.0,
200+
)
201+
allowed, _ = guard.check_task("bot-1", estimated_cost=estimated)
202+
assert allowed is expected
203+
204+
# Rule 17: NaN/Inf bypass -- org_monthly_budget as bad value
205+
# nan > 0 is False in IEEE 754, so org check is skipped entirely -> allowed
206+
# inf > 0 is True, but spent + cost > inf is always False -> allowed
207+
# -inf > 0 is False, so org check is skipped -> allowed
208+
@pytest.mark.parametrize("bad_budget", [float("nan"), float("inf"), float("-inf")])
209+
def test_nan_inf_budget_does_not_crash(self, bad_budget: float) -> None:
210+
guard = CostGuard(
211+
per_task_limit=100.0,
212+
per_agent_daily_limit=1000.0,
213+
org_monthly_budget=bad_budget,
214+
)
215+
# Must not raise; behavior may vary but no crash
216+
allowed, reason = guard.check_task("bot-1", estimated_cost=1.0)
217+
assert isinstance(allowed, bool)
218+
assert isinstance(reason, str)
219+
220+
# Rule 17: NaN/Inf bypass -- estimated_cost as bad value
221+
# nan > per_task_limit is False, nan comparisons are always False -> check_task allows nan
222+
# inf > per_task_limit(100) is True -> denied at per-task check (not org)
223+
# -inf passes all greater-than checks -> allowed
224+
@pytest.mark.parametrize("bad_cost", [float("nan"), float("inf"), float("-inf")])
225+
def test_nan_inf_estimated_cost_does_not_crash(self, bad_cost: float) -> None:
226+
guard = CostGuard(
227+
per_task_limit=100.0,
228+
per_agent_daily_limit=1000.0,
229+
org_monthly_budget=50.0,
230+
)
231+
allowed, reason = guard.check_task("bot-1", estimated_cost=bad_cost)
232+
assert isinstance(allowed, bool)
233+
assert isinstance(reason, str)
234+
235+
# Rule 23: Zero-semantics (0 = unlimited, not zero budget)
236+
# Implementation: `if self.org_monthly_budget > 0` gates the org check.
237+
# org_monthly_budget=0.0 means the guard is disabled, not "zero budget allowed".
238+
def test_zero_org_budget_means_no_limit(self) -> None:
239+
guard = CostGuard(
240+
per_task_limit=100.0,
241+
per_agent_daily_limit=10000.0,
242+
org_monthly_budget=0.0,
243+
)
244+
guard.record_cost("bot-1", "t1", 99999.0)
245+
allowed, _ = guard.check_task("bot-2", estimated_cost=1.0)
246+
# org_monthly_budget=0 means disabled (guard checks > 0), so org check is skipped
247+
assert allowed is True
248+
249+
# Rule 6: Compound state (per-agent killed + org budget exceeded simultaneously)
250+
# When agent is killed, the kill check fires first in check_task.
251+
def test_agent_killed_and_org_exceeded(self) -> None:
252+
guard = CostGuard(
253+
per_task_limit=100.0,
254+
per_agent_daily_limit=10.0,
255+
org_monthly_budget=50.0,
256+
auto_throttle=True,
257+
kill_switch_threshold=0.95,
258+
)
259+
guard.record_cost("bot-1", "t1", 9.6) # 96% daily -> killed
260+
guard.record_cost("bot-2", "t2", 45.0) # org total = 54.6 > 50
261+
# bot-1 denied for agent kill, not org budget -- kill check runs first
262+
allowed, reason = guard.check_task("bot-1", estimated_cost=0.01)
263+
assert allowed is False
264+
assert "killed" in reason.lower()
265+
266+
# Rule 7: Side-effect verification
267+
# record_cost must update both org_spent_month and org_remaining_month.
268+
def test_record_cost_updates_org_spent(self) -> None:
269+
guard = CostGuard(
270+
per_task_limit=100.0,
271+
per_agent_daily_limit=1000.0,
272+
org_monthly_budget=100.0,
273+
)
274+
guard.record_cost("bot-1", "t1", 10.0)
275+
guard.record_cost("bot-2", "t2", 20.0)
276+
assert guard.org_spent_month == 30.0
277+
assert guard.org_remaining_month == 70.0
278+
279+
# Rule 14: Concurrent access
280+
# Multiple threads recording costs simultaneously must not corrupt state or crash.
281+
def test_concurrent_record_cost_no_crash(self) -> None:
282+
import threading
283+
guard = CostGuard(
284+
per_task_limit=100.0,
285+
per_agent_daily_limit=10000.0,
286+
org_monthly_budget=10000.0,
287+
)
288+
errors: list[str] = []
289+
290+
def spend(agent_id: str) -> None:
291+
try:
292+
for i in range(100):
293+
guard.record_cost(agent_id, f"t{i}", 0.01)
294+
except Exception as e:
295+
errors.append(str(e))
296+
297+
threads = [threading.Thread(target=spend, args=(f"bot-{i}",)) for i in range(10)]
298+
for t in threads:
299+
t.start()
300+
for t in threads:
301+
t.join()
302+
assert errors == []
303+
# 10 threads x 100 records x $0.01 = $10.00 (allow 1.0 float drift from races)
304+
assert abs(guard.org_spent_month - 10.0) < 1.0
305+
306+
# Boundary: negative org budget
307+
# Implementation: `if self.org_monthly_budget > 0` -- negative fails this check,
308+
# so the org gate is skipped entirely and the task is allowed.
309+
def test_negative_org_budget_treated_as_disabled(self) -> None:
310+
guard = CostGuard(
311+
per_task_limit=100.0,
312+
per_agent_daily_limit=1000.0,
313+
org_monthly_budget=-1.0,
314+
)
315+
allowed, _ = guard.check_task("bot-1", estimated_cost=1.0)
316+
# Negative budget: guard checks > 0, so org check skipped -> allowed
317+
assert allowed is True
318+
319+
# Kill alert fires exactly once across threshold
320+
# Implementation uses: prev_org_util < threshold <= org_util
321+
# After first crossing, prev_org_util is already above threshold, so condition is False.
322+
def test_org_kill_alert_fires_once(self) -> None:
323+
guard = CostGuard(
324+
per_task_limit=1000.0,
325+
per_agent_daily_limit=10000.0,
326+
org_monthly_budget=100.0,
327+
auto_throttle=True,
328+
kill_switch_threshold=0.95,
329+
)
330+
# First call crosses threshold (0.0 -> 0.96, crosses 0.95)
331+
alerts1 = guard.record_cost("bot-1", "t1", 96.0)
332+
kill1 = [a for a in alerts1 if "org budget" in a.message.lower() and "kill" in a.message.lower()]
333+
# Second call is already above threshold (0.96 -> 0.97, prev >= threshold -> no crossing)
334+
alerts2 = guard.record_cost("bot-1", "t2", 1.0)
335+
kill2 = [a for a in alerts2 if "org budget" in a.message.lower() and "kill" in a.message.lower()]
336+
assert len(kill1) >= 1
337+
assert len(kill2) == 0 # must not fire again
338+
339+
# WARN-2 fix: org kill must set budget.killed on all agents
340+
def test_org_kill_sets_killed_on_all_agents(self) -> None:
341+
guard = CostGuard(
342+
per_task_limit=1000.0,
343+
per_agent_daily_limit=10000.0,
344+
org_monthly_budget=100.0,
345+
auto_throttle=True,
346+
kill_switch_threshold=0.95,
347+
)
348+
guard.record_cost("bot-1", "t1", 50.0)
349+
guard.record_cost("bot-2", "t2", 46.0) # org total = 96% -> kill
350+
# Both registered agents should be killed
351+
assert guard.get_budget("bot-1").killed is True
352+
assert guard.get_budget("bot-2").killed is True
353+
# Killed agents are blocked via check_task (agent kill gate)
354+
allowed_1, reason_1 = guard.check_task("bot-1", estimated_cost=0.01)
355+
assert allowed_1 is False
356+
assert "killed" in reason_1.lower()

0 commit comments

Comments
 (0)