@@ -125,3 +125,232 @@ def test_summary(self) -> None:
125125 s = guard .summary ()
126126 assert s ["total_records" ] == 1
127127 assert "bot-1" in s ["agents" ]
128+
129+ def test_check_task_exceeds_org_budget (self ) -> None :
130+ guard = CostGuard (
131+ per_task_limit = 100.0 ,
132+ per_agent_daily_limit = 1000.0 ,
133+ org_monthly_budget = 50.0 ,
134+ )
135+ guard .record_cost ("bot-1" , "t1" , 30.0 )
136+ guard .record_cost ("bot-2" , "t2" , 15.0 )
137+ # 45 spent, trying to add 10 -> 55 > 50
138+ allowed , reason = guard .check_task ("bot-3" , estimated_cost = 10.0 )
139+ assert allowed is False
140+ assert "org monthly budget" in reason .lower ()
141+
142+ def test_check_task_within_org_budget (self ) -> None :
143+ guard = CostGuard (
144+ per_task_limit = 100.0 ,
145+ per_agent_daily_limit = 1000.0 ,
146+ org_monthly_budget = 100.0 ,
147+ )
148+ guard .record_cost ("bot-1" , "t1" , 30.0 )
149+ allowed , reason = guard .check_task ("bot-2" , estimated_cost = 10.0 )
150+ assert allowed is True
151+
152+ def test_org_budget_kill_alert (self ) -> None :
153+ guard = CostGuard (
154+ per_task_limit = 1000.0 ,
155+ per_agent_daily_limit = 10000.0 ,
156+ org_monthly_budget = 100.0 ,
157+ auto_throttle = True ,
158+ kill_switch_threshold = 0.95 ,
159+ )
160+ alerts = guard .record_cost ("bot-1" , "t1" , 96.0 ) # 96% of org budget
161+ kill_alerts = [a for a in alerts if "org budget" in a .message .lower () and "kill" in a .message .lower ()]
162+ assert len (kill_alerts ) >= 1
163+
164+ def test_org_budget_multi_agent_aggregate (self ) -> None :
165+ guard = CostGuard (
166+ per_task_limit = 100.0 ,
167+ per_agent_daily_limit = 100.0 ,
168+ org_monthly_budget = 50.0 ,
169+ )
170+ # Each agent within daily limit, but org total exceeds
171+ guard .record_cost ("bot-1" , "t1" , 20.0 )
172+ guard .record_cost ("bot-2" , "t2" , 20.0 )
173+ guard .record_cost ("bot-3" , "t3" , 15.0 )
174+ # 55 total > 50 org budget; bot-4 should be blocked
175+ allowed , reason = guard .check_task ("bot-4" , estimated_cost = 1.0 )
176+ assert allowed is False
177+ assert "org monthly budget" in reason .lower ()
178+
179+
180+ import pytest
181+
182+
183+ class TestCostGuardOrgBudgetAdversarial :
184+ """Adversarial tests for org_monthly_budget enforcement."""
185+
186+ # Rule 11: Boundary Triple (limit-1, limit, limit+1)
187+ # NOTE: Implementation uses strict inequality (spent + estimated > budget),
188+ # so estimated==budget with spent==0 is allowed (equal is not exceeding).
189+ # The boundary that denies is any value strictly above the budget.
190+ @pytest .mark .parametrize ("estimated,expected" , [
191+ (49.99 , True ), # below limit -- allowed
192+ (50.00 , True ), # at limit exactly -- allowed (0 + 50.0 > 50.0 is False)
193+ (50.01 , False ), # above limit -- denied
194+ ])
195+ def test_org_budget_boundary_triple (self , estimated : float , expected : bool ) -> None :
196+ guard = CostGuard (
197+ per_task_limit = 100.0 ,
198+ per_agent_daily_limit = 1000.0 ,
199+ org_monthly_budget = 50.0 ,
200+ )
201+ allowed , _ = guard .check_task ("bot-1" , estimated_cost = estimated )
202+ assert allowed is expected
203+
204+ # Rule 17: NaN/Inf bypass -- org_monthly_budget as bad value
205+ # nan > 0 is False in IEEE 754, so org check is skipped entirely -> allowed
206+ # inf > 0 is True, but spent + cost > inf is always False -> allowed
207+ # -inf > 0 is False, so org check is skipped -> allowed
208+ @pytest .mark .parametrize ("bad_budget" , [float ("nan" ), float ("inf" ), float ("-inf" )])
209+ def test_nan_inf_budget_does_not_crash (self , bad_budget : float ) -> None :
210+ guard = CostGuard (
211+ per_task_limit = 100.0 ,
212+ per_agent_daily_limit = 1000.0 ,
213+ org_monthly_budget = bad_budget ,
214+ )
215+ # Must not raise; behavior may vary but no crash
216+ allowed , reason = guard .check_task ("bot-1" , estimated_cost = 1.0 )
217+ assert isinstance (allowed , bool )
218+ assert isinstance (reason , str )
219+
220+ # Rule 17: NaN/Inf bypass -- estimated_cost as bad value
221+ # nan > per_task_limit is False, nan comparisons are always False -> check_task allows nan
222+ # inf > per_task_limit(100) is True -> denied at per-task check (not org)
223+ # -inf passes all greater-than checks -> allowed
224+ @pytest .mark .parametrize ("bad_cost" , [float ("nan" ), float ("inf" ), float ("-inf" )])
225+ def test_nan_inf_estimated_cost_does_not_crash (self , bad_cost : float ) -> None :
226+ guard = CostGuard (
227+ per_task_limit = 100.0 ,
228+ per_agent_daily_limit = 1000.0 ,
229+ org_monthly_budget = 50.0 ,
230+ )
231+ allowed , reason = guard .check_task ("bot-1" , estimated_cost = bad_cost )
232+ assert isinstance (allowed , bool )
233+ assert isinstance (reason , str )
234+
235+ # Rule 23: Zero-semantics (0 = unlimited, not zero budget)
236+ # Implementation: `if self.org_monthly_budget > 0` gates the org check.
237+ # org_monthly_budget=0.0 means the guard is disabled, not "zero budget allowed".
238+ def test_zero_org_budget_means_no_limit (self ) -> None :
239+ guard = CostGuard (
240+ per_task_limit = 100.0 ,
241+ per_agent_daily_limit = 10000.0 ,
242+ org_monthly_budget = 0.0 ,
243+ )
244+ guard .record_cost ("bot-1" , "t1" , 99999.0 )
245+ allowed , _ = guard .check_task ("bot-2" , estimated_cost = 1.0 )
246+ # org_monthly_budget=0 means disabled (guard checks > 0), so org check is skipped
247+ assert allowed is True
248+
249+ # Rule 6: Compound state (per-agent killed + org budget exceeded simultaneously)
250+ # When agent is killed, the kill check fires first in check_task.
251+ def test_agent_killed_and_org_exceeded (self ) -> None :
252+ guard = CostGuard (
253+ per_task_limit = 100.0 ,
254+ per_agent_daily_limit = 10.0 ,
255+ org_monthly_budget = 50.0 ,
256+ auto_throttle = True ,
257+ kill_switch_threshold = 0.95 ,
258+ )
259+ guard .record_cost ("bot-1" , "t1" , 9.6 ) # 96% daily -> killed
260+ guard .record_cost ("bot-2" , "t2" , 45.0 ) # org total = 54.6 > 50
261+ # bot-1 denied for agent kill, not org budget -- kill check runs first
262+ allowed , reason = guard .check_task ("bot-1" , estimated_cost = 0.01 )
263+ assert allowed is False
264+ assert "killed" in reason .lower ()
265+
266+ # Rule 7: Side-effect verification
267+ # record_cost must update both org_spent_month and org_remaining_month.
268+ def test_record_cost_updates_org_spent (self ) -> None :
269+ guard = CostGuard (
270+ per_task_limit = 100.0 ,
271+ per_agent_daily_limit = 1000.0 ,
272+ org_monthly_budget = 100.0 ,
273+ )
274+ guard .record_cost ("bot-1" , "t1" , 10.0 )
275+ guard .record_cost ("bot-2" , "t2" , 20.0 )
276+ assert guard .org_spent_month == 30.0
277+ assert guard .org_remaining_month == 70.0
278+
279+ # Rule 14: Concurrent access
280+ # Multiple threads recording costs simultaneously must not corrupt state or crash.
281+ def test_concurrent_record_cost_no_crash (self ) -> None :
282+ import threading
283+ guard = CostGuard (
284+ per_task_limit = 100.0 ,
285+ per_agent_daily_limit = 10000.0 ,
286+ org_monthly_budget = 10000.0 ,
287+ )
288+ errors : list [str ] = []
289+
290+ def spend (agent_id : str ) -> None :
291+ try :
292+ for i in range (100 ):
293+ guard .record_cost (agent_id , f"t{ i } " , 0.01 )
294+ except Exception as e :
295+ errors .append (str (e ))
296+
297+ threads = [threading .Thread (target = spend , args = (f"bot-{ i } " ,)) for i in range (10 )]
298+ for t in threads :
299+ t .start ()
300+ for t in threads :
301+ t .join ()
302+ assert errors == []
303+ # 10 threads x 100 records x $0.01 = $10.00 (allow 1.0 float drift from races)
304+ assert abs (guard .org_spent_month - 10.0 ) < 1.0
305+
306+ # Boundary: negative org budget
307+ # Implementation: `if self.org_monthly_budget > 0` -- negative fails this check,
308+ # so the org gate is skipped entirely and the task is allowed.
309+ def test_negative_org_budget_treated_as_disabled (self ) -> None :
310+ guard = CostGuard (
311+ per_task_limit = 100.0 ,
312+ per_agent_daily_limit = 1000.0 ,
313+ org_monthly_budget = - 1.0 ,
314+ )
315+ allowed , _ = guard .check_task ("bot-1" , estimated_cost = 1.0 )
316+ # Negative budget: guard checks > 0, so org check skipped -> allowed
317+ assert allowed is True
318+
319+ # Kill alert fires exactly once across threshold
320+ # Implementation uses: prev_org_util < threshold <= org_util
321+ # After first crossing, prev_org_util is already above threshold, so condition is False.
322+ def test_org_kill_alert_fires_once (self ) -> None :
323+ guard = CostGuard (
324+ per_task_limit = 1000.0 ,
325+ per_agent_daily_limit = 10000.0 ,
326+ org_monthly_budget = 100.0 ,
327+ auto_throttle = True ,
328+ kill_switch_threshold = 0.95 ,
329+ )
330+ # First call crosses threshold (0.0 -> 0.96, crosses 0.95)
331+ alerts1 = guard .record_cost ("bot-1" , "t1" , 96.0 )
332+ kill1 = [a for a in alerts1 if "org budget" in a .message .lower () and "kill" in a .message .lower ()]
333+ # Second call is already above threshold (0.96 -> 0.97, prev >= threshold -> no crossing)
334+ alerts2 = guard .record_cost ("bot-1" , "t2" , 1.0 )
335+ kill2 = [a for a in alerts2 if "org budget" in a .message .lower () and "kill" in a .message .lower ()]
336+ assert len (kill1 ) >= 1
337+ assert len (kill2 ) == 0 # must not fire again
338+
339+ # WARN-2 fix: org kill must set budget.killed on all agents
340+ def test_org_kill_sets_killed_on_all_agents (self ) -> None :
341+ guard = CostGuard (
342+ per_task_limit = 1000.0 ,
343+ per_agent_daily_limit = 10000.0 ,
344+ org_monthly_budget = 100.0 ,
345+ auto_throttle = True ,
346+ kill_switch_threshold = 0.95 ,
347+ )
348+ guard .record_cost ("bot-1" , "t1" , 50.0 )
349+ guard .record_cost ("bot-2" , "t2" , 46.0 ) # org total = 96% -> kill
350+ # Both registered agents should be killed
351+ assert guard .get_budget ("bot-1" ).killed is True
352+ assert guard .get_budget ("bot-2" ).killed is True
353+ # Killed agents are blocked via check_task (agent kill gate)
354+ allowed_1 , reason_1 = guard .check_task ("bot-1" , estimated_cost = 0.01 )
355+ assert allowed_1 is False
356+ assert "killed" in reason_1 .lower ()
0 commit comments