22
22
result ,
23
23
)
24
24
from .result import ResultData
25
- from .settings import ModelSettings , merge_model_settings
25
+ from .settings import ModelSettings , UsageLimits , merge_model_settings
26
26
from .tools import (
27
27
AgentDeps ,
28
28
RunContext ,
@@ -191,6 +191,7 @@ async def run(
191
191
model : models .Model | models .KnownModelName | None = None ,
192
192
deps : AgentDeps = None ,
193
193
model_settings : ModelSettings | None = None ,
194
+ usage_limits : UsageLimits | None = None ,
194
195
infer_name : bool = True ,
195
196
) -> result .RunResult [ResultData ]:
196
197
"""Run the agent with a user prompt in async mode.
@@ -211,8 +212,9 @@ async def run(
211
212
message_history: History of the conversation so far.
212
213
model: Optional model to use for this run, required if `model` was not set when creating the agent.
213
214
deps: Optional dependencies to use for this run.
214
- infer_name: Whether to try to infer the agent name from the call frame if it's not set.
215
215
model_settings: Optional settings to use for this model's request.
216
+ usage_limits: Optional limits on model request count or token usage.
217
+ infer_name: Whether to try to infer the agent name from the call frame if it's not set.
216
218
217
219
Returns:
218
220
The result of the run.
@@ -237,12 +239,14 @@ async def run(
237
239
for tool in self ._function_tools .values ():
238
240
tool .current_retry = 0
239
241
240
- usage = result .Usage ()
241
-
242
+ usage = result .Usage (requests = 0 )
242
243
model_settings = merge_model_settings (self .model_settings , model_settings )
244
+ usage_limits = usage_limits or UsageLimits ()
243
245
244
246
run_step = 0
245
247
while True :
248
+ usage_limits .check_before_request (usage )
249
+
246
250
run_step += 1
247
251
with _logfire .span ('preparing model and tools {run_step=}' , run_step = run_step ):
248
252
agent_model = await self ._prepare_model (model_used , deps , messages )
@@ -254,6 +258,8 @@ async def run(
254
258
255
259
messages .append (model_response )
256
260
usage += request_usage
261
+ usage .requests += 1
262
+ usage_limits .check_tokens (request_usage )
257
263
258
264
with _logfire .span ('handle model response' , run_step = run_step ) as handle_span :
259
265
final_result , tool_responses = await self ._handle_model_response (model_response , deps , messages )
@@ -284,6 +290,7 @@ def run_sync(
284
290
model : models .Model | models .KnownModelName | None = None ,
285
291
deps : AgentDeps = None ,
286
292
model_settings : ModelSettings | None = None ,
293
+ usage_limits : UsageLimits | None = None ,
287
294
infer_name : bool = True ,
288
295
) -> result .RunResult [ResultData ]:
289
296
"""Run the agent with a user prompt synchronously.
@@ -308,8 +315,9 @@ async def main():
308
315
message_history: History of the conversation so far.
309
316
model: Optional model to use for this run, required if `model` was not set when creating the agent.
310
317
deps: Optional dependencies to use for this run.
311
- infer_name: Whether to try to infer the agent name from the call frame if it's not set.
312
318
model_settings: Optional settings to use for this model's request.
319
+ usage_limits: Optional limits on model request count or token usage.
320
+ infer_name: Whether to try to infer the agent name from the call frame if it's not set.
313
321
314
322
Returns:
315
323
The result of the run.
@@ -322,8 +330,9 @@ async def main():
322
330
message_history = message_history ,
323
331
model = model ,
324
332
deps = deps ,
325
- infer_name = False ,
326
333
model_settings = model_settings ,
334
+ usage_limits = usage_limits ,
335
+ infer_name = False ,
327
336
)
328
337
)
329
338
@@ -336,6 +345,7 @@ async def run_stream(
336
345
model : models .Model | models .KnownModelName | None = None ,
337
346
deps : AgentDeps = None ,
338
347
model_settings : ModelSettings | None = None ,
348
+ usage_limits : UsageLimits | None = None ,
339
349
infer_name : bool = True ,
340
350
) -> AsyncIterator [result .StreamedRunResult [AgentDeps , ResultData ]]:
341
351
"""Run the agent with a user prompt in async mode, returning a streamed response.
@@ -357,8 +367,9 @@ async def main():
357
367
message_history: History of the conversation so far.
358
368
model: Optional model to use for this run, required if `model` was not set when creating the agent.
359
369
deps: Optional dependencies to use for this run.
360
- infer_name: Whether to try to infer the agent name from the call frame if it's not set.
361
370
model_settings: Optional settings to use for this model's request.
371
+ usage_limits: Optional limits on model request count or token usage.
372
+ infer_name: Whether to try to infer the agent name from the call frame if it's not set.
362
373
363
374
Returns:
364
375
The result of the run.
@@ -387,16 +398,19 @@ async def main():
387
398
388
399
usage = result .Usage ()
389
400
model_settings = merge_model_settings (self .model_settings , model_settings )
401
+ usage_limits = usage_limits or UsageLimits ()
390
402
391
403
run_step = 0
392
404
while True :
393
405
run_step += 1
406
+ usage_limits .check_before_request (usage )
394
407
395
408
with _logfire .span ('preparing model and tools {run_step=}' , run_step = run_step ):
396
409
agent_model = await self ._prepare_model (model_used , deps , messages )
397
410
398
411
with _logfire .span ('model request {run_step=}' , run_step = run_step ) as model_req_span :
399
412
async with agent_model .request_stream (messages , model_settings ) as model_response :
413
+ usage .requests += 1
400
414
model_req_span .set_attribute ('response_type' , model_response .__class__ .__name__ )
401
415
# We want to end the "model request" span here, but we can't exit the context manager
402
416
# in the traditional way
@@ -435,6 +449,7 @@ async def on_complete():
435
449
messages ,
436
450
new_message_index ,
437
451
usage ,
452
+ usage_limits ,
438
453
result_stream ,
439
454
self ._result_schema ,
440
455
deps ,
@@ -456,7 +471,9 @@ async def on_complete():
456
471
tool_responses_str = ' ' .join (r .part_kind for r in tool_responses )
457
472
handle_span .message = f'handle model response -> { tool_responses_str } '
458
473
# the model_response should have been fully streamed by now, we can add its usage
459
- usage += model_response .usage ()
474
+ model_response_usage = model_response .usage ()
475
+ usage += model_response_usage
476
+ usage_limits .check_tokens (usage )
460
477
461
478
@contextmanager
462
479
def override (
0 commit comments