31
31
from typing import Annotated , Union
32
32
33
33
import click
34
+ from pydantic import ValidationError
34
35
35
36
try :
36
37
import uvloop
55
56
)
56
57
from guidellm .benchmark .scenario import (
57
58
GenerativeTextScenario ,
59
+ get_builtin_scenarios ,
58
60
)
59
61
from guidellm .mock_server import MockServer , MockServerConfig
60
62
from guidellm .preprocess .dataset import ShortPromptStrategy , process_dataset
@@ -135,6 +137,25 @@ def benchmark():
135
137
help = "Run a benchmark against a generative model using the specified arguments." ,
136
138
context_settings = {"auto_envvar_prefix" : "GUIDELLM" },
137
139
)
140
+ @click .option (
141
+ "--scenario" ,
142
+ type = cli_tools .Union (
143
+ click .Path (
144
+ exists = True ,
145
+ readable = True ,
146
+ file_okay = True ,
147
+ dir_okay = False ,
148
+ path_type = Path ,
149
+ ),
150
+ click .Choice (get_builtin_scenarios ()),
151
+ ),
152
+ default = None ,
153
+ help = (
154
+ "The name of a builtin scenario or path to a config file. "
155
+ "Missing values from the config will use defaults. "
156
+ "Options specified on the commandline will override the scenario."
157
+ ),
158
+ )
138
159
@click .option (
139
160
"--target" ,
140
161
type = str ,
@@ -161,7 +182,7 @@ def benchmark():
161
182
)
162
183
@click .option (
163
184
"--rate" ,
164
- default = None ,
185
+ default = GenerativeTextScenario . get_default ( "rate" ) ,
165
186
help = (
166
187
"The rates to run the benchmark at. "
167
188
"Can be a single number or a comma-separated list of numbers. "
@@ -183,18 +204,18 @@ def benchmark():
183
204
"--backend-type" , # legacy alias
184
205
"backend" ,
185
206
type = click .Choice (list (get_literal_vals (BackendType ))),
207
+ default = GenerativeTextScenario .get_default ("backend" ),
186
208
help = (
187
209
"The type of backend to use to run requests against. Defaults to 'openai_http'."
188
210
f" Supported types: { ', ' .join (get_literal_vals (BackendType ))} "
189
211
),
190
- default = "openai_http" ,
191
212
)
192
213
@click .option (
193
214
"--backend-kwargs" ,
194
215
"--backend-args" , # legacy alias
195
216
"backend_kwargs" ,
196
217
callback = cli_tools .parse_json ,
197
- default = None ,
218
+ default = GenerativeTextScenario . get_default ( "backend_kwargs" ) ,
198
219
help = (
199
220
"A JSON string containing any arguments to pass to the backend as a "
200
221
"dict with **kwargs. Headers can be removed by setting their value to "
@@ -204,7 +225,7 @@ def benchmark():
204
225
)
205
226
@click .option (
206
227
"--model" ,
207
- default = None ,
228
+ default = GenerativeTextScenario . get_default ( "model" ) ,
208
229
type = str ,
209
230
help = (
210
231
"The ID of the model to benchmark within the backend. "
@@ -214,7 +235,7 @@ def benchmark():
214
235
# Data configuration
215
236
@click .option (
216
237
"--processor" ,
217
- default = None ,
238
+ default = GenerativeTextScenario . get_default ( "processor" ) ,
218
239
type = str ,
219
240
help = (
220
241
"The processor or tokenizer to use to calculate token counts for statistics "
@@ -224,7 +245,7 @@ def benchmark():
224
245
)
225
246
@click .option (
226
247
"--processor-args" ,
227
- default = None ,
248
+ default = GenerativeTextScenario . get_default ( "processor_args" ) ,
228
249
callback = cli_tools .parse_json ,
229
250
help = (
230
251
"A JSON string containing any arguments to pass to the processor constructor "
@@ -233,7 +254,7 @@ def benchmark():
233
254
)
234
255
@click .option (
235
256
"--data-args" ,
236
- default = None ,
257
+ default = GenerativeTextScenario . get_default ( "data_args" ) ,
237
258
callback = cli_tools .parse_json ,
238
259
help = (
239
260
"A JSON string containing any arguments to pass to the dataset creation "
@@ -242,7 +263,7 @@ def benchmark():
242
263
)
243
264
@click .option (
244
265
"--data-sampler" ,
245
- default = None ,
266
+ default = GenerativeTextScenario . get_default ( "data_sampler" ) ,
246
267
type = click .Choice (["random" ]),
247
268
help = (
248
269
"The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -301,7 +322,7 @@ def benchmark():
301
322
"--warmup-percent" , # legacy alias
302
323
"warmup" ,
303
324
type = float ,
304
- default = None ,
325
+ default = GenerativeTextScenario . get_default ( "warmup" ) ,
305
326
help = (
306
327
"The specification around the number of requests to run before benchmarking. "
307
328
"If within (0, 1), then the percent of requests/time to use for warmup. "
@@ -315,7 +336,7 @@ def benchmark():
315
336
"--cooldown-percent" , # legacy alias
316
337
"cooldown" ,
317
338
type = float ,
318
- default = GenerativeTextScenario .get_default ("cooldown_percent " ),
339
+ default = GenerativeTextScenario .get_default ("cooldown " ),
319
340
help = (
320
341
"The specification around the number of requests to run after benchmarking. "
321
342
"If within (0, 1), then the percent of requests/time to use for cooldown. "
@@ -328,19 +349,19 @@ def benchmark():
328
349
"--request-samples" ,
329
350
"--output-sampling" , # legacy alias
330
351
"request_samples" ,
352
+ default = GenerativeTextScenario .get_default ("request_samples" ),
331
353
type = int ,
332
354
help = (
333
355
"The number of samples for each request status and each benchmark to save "
334
356
"in the output file. If None (default), will save all samples. "
335
357
"Defaults to 20."
336
358
),
337
- default = 20 ,
338
359
)
339
360
# Constraints configuration
340
361
@click .option (
341
362
"--max-seconds" ,
342
363
type = float ,
343
- default = None ,
364
+ default = GenerativeTextScenario . get_default ( "max_seconds" ) ,
344
365
help = (
345
366
"The maximum number of seconds each benchmark can run for. "
346
367
"If None, will run until max_requests or the data is exhausted."
@@ -349,7 +370,7 @@ def benchmark():
349
370
@click .option (
350
371
"--max-requests" ,
351
372
type = int ,
352
- default = None ,
373
+ default = GenerativeTextScenario . get_default ( "max_requests" ) ,
353
374
help = (
354
375
"The maximum number of requests each benchmark can run for. "
355
376
"If None, will run until max_seconds or the data is exhausted."
@@ -358,55 +379,22 @@ def benchmark():
358
379
@click .option (
359
380
"--max-errors" ,
360
381
type = int ,
361
- default = None ,
382
+ default = GenerativeTextScenario . get_default ( "max_errors" ) ,
362
383
help = "Maximum number of errors allowed before stopping the benchmark" ,
363
384
)
364
385
@click .option (
365
386
"--max-error-rate" ,
366
387
type = float ,
367
- default = None ,
388
+ default = GenerativeTextScenario . get_default ( "max_error_rate" ) ,
368
389
help = "Maximum error rate allowed before stopping the benchmark" ,
369
390
)
370
391
@click .option (
371
392
"--max-global-error-rate" ,
372
393
type = float ,
373
- default = None ,
394
+ default = GenerativeTextScenario . get_default ( "max_global_error_rate" ) ,
374
395
help = "Maximum global error rate allowed across all benchmarks" ,
375
396
)
376
- def run (
377
- target ,
378
- data ,
379
- profile ,
380
- rate ,
381
- random_seed ,
382
- # Backend Configuration
383
- backend ,
384
- backend_kwargs ,
385
- model ,
386
- # Data configuration
387
- processor ,
388
- processor_args ,
389
- data_args ,
390
- data_sampler ,
391
- # Output configuration
392
- output_path ,
393
- output_formats ,
394
- # Updates configuration
395
- disable_console_outputs ,
396
- disable_progress ,
397
- display_scheduler_stats ,
398
- # Aggregators configuration
399
- output_extras ,
400
- warmup ,
401
- cooldown ,
402
- request_samples ,
403
- # Constraints configuration
404
- max_seconds ,
405
- max_requests ,
406
- max_errors ,
407
- max_error_rate ,
408
- max_global_error_rate ,
409
- ):
397
+ def run (** kwargs ):
410
398
"""
411
399
Execute a generative text benchmark against a target model backend.
412
400
@@ -415,53 +403,53 @@ def run(
415
403
Supports multiple backends, data sources, output formats, and constraint types
416
404
for flexible benchmark configuration.
417
405
"""
406
+ scenario = kwargs .pop ("scenario" )
407
+ click_ctx = click .get_current_context ()
408
+ overrides = cli_tools .set_if_not_default (click_ctx , ** kwargs )
409
+
410
+ try :
411
+ # If a scenario file was specified read from it
412
+ if scenario is None :
413
+ _scenario = GenerativeTextScenario .model_validate (overrides )
414
+ elif isinstance (scenario , Path ):
415
+ _scenario = GenerativeTextScenario .from_file (scenario , overrides )
416
+ else : # Only builtins can make it here; click will catch anything else
417
+ _scenario = GenerativeTextScenario .from_builtin (scenario , overrides )
418
+ except ValidationError as e :
419
+ # Translate pydantic valdation error to click argument error
420
+ errs = e .errors (include_url = False , include_context = True , include_input = True )
421
+ param_name = "--" + str (errs [0 ]["loc" ][0 ]).replace ("_" , "-" )
422
+ raise click .BadParameter (
423
+ errs [0 ]["msg" ], ctx = click_ctx , param_hint = param_name
424
+ ) from e
425
+
418
426
if HAS_UVLOOP :
419
427
asyncio .set_event_loop_policy (uvloop .EventLoopPolicy ())
420
428
asyncio .run (
421
429
benchmark_generative_text (
422
- target = target ,
423
- data = data ,
424
- profile = profile ,
425
- rate = rate ,
426
- random_seed = random_seed ,
427
- # Backend configuration
428
- backend = backend ,
429
- backend_kwargs = backend_kwargs ,
430
- model = model ,
431
- # Data configuration
432
- processor = processor ,
433
- processor_args = processor_args ,
434
- data_args = data_args ,
435
- data_sampler = data_sampler ,
430
+ scenario = _scenario ,
436
431
# Output configuration
437
- output_path = output_path ,
432
+ output_path = kwargs [ " output_path" ] ,
438
433
output_formats = [
439
434
fmt
440
- for fmt in output_formats
441
- if not disable_console_outputs or fmt != "console"
435
+ for fmt in kwargs [ " output_formats" ]
436
+ if not kwargs [ " disable_console_outputs" ] or fmt != "console"
442
437
],
443
438
# Updates configuration
444
439
progress = (
445
440
[
446
441
GenerativeConsoleBenchmarkerProgress (
447
- display_scheduler_stats = display_scheduler_stats
442
+ display_scheduler_stats = kwargs [ " display_scheduler_stats" ]
448
443
)
449
444
]
450
- if not disable_progress
445
+ if not kwargs [ " disable_progress" ]
451
446
else None
452
447
),
453
- print_updates = not disable_console_outputs ,
448
+ print_updates = not kwargs [ " disable_console_outputs" ] ,
454
449
# Aggregators configuration
455
- add_aggregators = {"extras" : InjectExtrasAggregator (extras = output_extras )},
456
- warmup = warmup ,
457
- cooldown = cooldown ,
458
- request_samples = request_samples ,
459
- # Constraints configuration
460
- max_seconds = max_seconds ,
461
- max_requests = max_requests ,
462
- max_errors = max_errors ,
463
- max_error_rate = max_error_rate ,
464
- max_global_error_rate = max_global_error_rate ,
450
+ add_aggregators = {
451
+ "extras" : InjectExtrasAggregator (extras = kwargs ["output_extras" ])
452
+ },
465
453
)
466
454
)
467
455
0 commit comments