31
31
from typing import Annotated , Union
32
32
33
33
import click
34
+ from pydantic import ValidationError
34
35
35
36
try :
36
37
import uvloop
55
56
)
56
57
from guidellm .benchmark .scenario import (
57
58
GenerativeTextScenario ,
59
+ get_builtin_scenarios ,
58
60
)
59
61
from guidellm .mock_server import MockServer , MockServerConfig
60
62
from guidellm .preprocess .dataset import ShortPromptStrategy , process_dataset
@@ -134,6 +136,25 @@ def benchmark():
134
136
help = "Run a benchmark against a generative model using the specified arguments." ,
135
137
context_settings = {"auto_envvar_prefix" : "GUIDELLM" },
136
138
)
139
+ @click .option (
140
+ "--scenario" ,
141
+ type = cli_tools .Union (
142
+ click .Path (
143
+ exists = True ,
144
+ readable = True ,
145
+ file_okay = True ,
146
+ dir_okay = False ,
147
+ path_type = Path ,
148
+ ),
149
+ click .Choice (get_builtin_scenarios ()),
150
+ ),
151
+ default = None ,
152
+ help = (
153
+ "The name of a builtin scenario or path to a config file. "
154
+ "Missing values from the config will use defaults. "
155
+ "Options specified on the commandline will override the scenario."
156
+ ),
157
+ )
137
158
@click .option (
138
159
"--target" ,
139
160
type = str ,
@@ -160,7 +181,7 @@ def benchmark():
160
181
)
161
182
@click .option (
162
183
"--rate" ,
163
- default = None ,
184
+ default = GenerativeTextScenario . get_default ( "rate" ) ,
164
185
help = (
165
186
"The rates to run the benchmark at. "
166
187
"Can be a single number or a comma-separated list of numbers. "
@@ -182,18 +203,18 @@ def benchmark():
182
203
"--backend-type" , # legacy alias
183
204
"backend" ,
184
205
type = click .Choice (list (get_literal_vals (BackendType ))),
206
+ default = GenerativeTextScenario .get_default ("backend" ),
185
207
help = (
186
208
"The type of backend to use to run requests against. Defaults to 'openai_http'."
187
209
f" Supported types: { ', ' .join (get_literal_vals (BackendType ))} "
188
210
),
189
- default = "openai_http" ,
190
211
)
191
212
@click .option (
192
213
"--backend-kwargs" ,
193
214
"--backend-args" , # legacy alias
194
215
"backend_kwargs" ,
195
216
callback = cli_tools .parse_json ,
196
- default = None ,
217
+ default = GenerativeTextScenario . get_default ( "backend_kwargs" ) ,
197
218
help = (
198
219
"A JSON string containing any arguments to pass to the backend as a "
199
220
"dict with **kwargs. Headers can be removed by setting their value to "
@@ -203,7 +224,7 @@ def benchmark():
203
224
)
204
225
@click .option (
205
226
"--model" ,
206
- default = None ,
227
+ default = GenerativeTextScenario . get_default ( "model" ) ,
207
228
type = str ,
208
229
help = (
209
230
"The ID of the model to benchmark within the backend. "
@@ -213,7 +234,7 @@ def benchmark():
213
234
# Data configuration
214
235
@click .option (
215
236
"--processor" ,
216
- default = None ,
237
+ default = GenerativeTextScenario . get_default ( "processor" ) ,
217
238
type = str ,
218
239
help = (
219
240
"The processor or tokenizer to use to calculate token counts for statistics "
@@ -223,7 +244,7 @@ def benchmark():
223
244
)
224
245
@click .option (
225
246
"--processor-args" ,
226
- default = None ,
247
+ default = GenerativeTextScenario . get_default ( "processor_args" ) ,
227
248
callback = cli_tools .parse_json ,
228
249
help = (
229
250
"A JSON string containing any arguments to pass to the processor constructor "
@@ -232,7 +253,7 @@ def benchmark():
232
253
)
233
254
@click .option (
234
255
"--data-args" ,
235
- default = None ,
256
+ default = GenerativeTextScenario . get_default ( "data_args" ) ,
236
257
callback = cli_tools .parse_json ,
237
258
help = (
238
259
"A JSON string containing any arguments to pass to the dataset creation "
@@ -241,7 +262,7 @@ def benchmark():
241
262
)
242
263
@click .option (
243
264
"--data-sampler" ,
244
- default = None ,
265
+ default = GenerativeTextScenario . get_default ( "data_sampler" ) ,
245
266
type = click .Choice (["random" ]),
246
267
help = (
247
268
"The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -300,7 +321,7 @@ def benchmark():
300
321
"--warmup-percent" , # legacy alias
301
322
"warmup" ,
302
323
type = float ,
303
- default = None ,
324
+ default = GenerativeTextScenario . get_default ( "warmup" ) ,
304
325
help = (
305
326
"The specification around the number of requests to run before benchmarking. "
306
327
"If within (0, 1), then the percent of requests/time to use for warmup. "
@@ -314,7 +335,7 @@ def benchmark():
314
335
"--cooldown-percent" , # legacy alias
315
336
"cooldown" ,
316
337
type = float ,
317
- default = GenerativeTextScenario .get_default ("cooldown_percent " ),
338
+ default = GenerativeTextScenario .get_default ("cooldown " ),
318
339
help = (
319
340
"The specification around the number of requests to run after benchmarking. "
320
341
"If within (0, 1), then the percent of requests/time to use for cooldown. "
@@ -327,19 +348,19 @@ def benchmark():
327
348
"--request-samples" ,
328
349
"--output-sampling" , # legacy alias
329
350
"request_samples" ,
351
+ default = GenerativeTextScenario .get_default ("request_samples" ),
330
352
type = int ,
331
353
help = (
332
354
"The number of samples for each request status and each benchmark to save "
333
355
"in the output file. If None (default), will save all samples. "
334
356
"Defaults to 20."
335
357
),
336
- default = 20 ,
337
358
)
338
359
# Constraints configuration
339
360
@click .option (
340
361
"--max-seconds" ,
341
362
type = float ,
342
- default = None ,
363
+ default = GenerativeTextScenario . get_default ( "max_seconds" ) ,
343
364
help = (
344
365
"The maximum number of seconds each benchmark can run for. "
345
366
"If None, will run until max_requests or the data is exhausted."
@@ -348,7 +369,7 @@ def benchmark():
348
369
@click .option (
349
370
"--max-requests" ,
350
371
type = int ,
351
- default = None ,
372
+ default = GenerativeTextScenario . get_default ( "max_requests" ) ,
352
373
help = (
353
374
"The maximum number of requests each benchmark can run for. "
354
375
"If None, will run until max_seconds or the data is exhausted."
@@ -357,55 +378,22 @@ def benchmark():
357
378
@click .option (
358
379
"--max-errors" ,
359
380
type = int ,
360
- default = None ,
381
+ default = GenerativeTextScenario . get_default ( "max_errors" ) ,
361
382
help = "Maximum number of errors allowed before stopping the benchmark" ,
362
383
)
363
384
@click .option (
364
385
"--max-error-rate" ,
365
386
type = float ,
366
- default = None ,
387
+ default = GenerativeTextScenario . get_default ( "max_error_rate" ) ,
367
388
help = "Maximum error rate allowed before stopping the benchmark" ,
368
389
)
369
390
@click .option (
370
391
"--max-global-error-rate" ,
371
392
type = float ,
372
- default = None ,
393
+ default = GenerativeTextScenario . get_default ( "max_global_error_rate" ) ,
373
394
help = "Maximum global error rate allowed across all benchmarks" ,
374
395
)
375
- def run (
376
- target ,
377
- data ,
378
- profile ,
379
- rate ,
380
- random_seed ,
381
- # Backend Configuration
382
- backend ,
383
- backend_kwargs ,
384
- model ,
385
- # Data configuration
386
- processor ,
387
- processor_args ,
388
- data_args ,
389
- data_sampler ,
390
- # Output configuration
391
- output_path ,
392
- output_formats ,
393
- # Updates configuration
394
- disable_console_outputs ,
395
- disable_progress ,
396
- display_scheduler_stats ,
397
- # Aggregators configuration
398
- output_extras ,
399
- warmup ,
400
- cooldown ,
401
- request_samples ,
402
- # Constraints configuration
403
- max_seconds ,
404
- max_requests ,
405
- max_errors ,
406
- max_error_rate ,
407
- max_global_error_rate ,
408
- ):
396
+ def run (** kwargs ):
409
397
"""
410
398
Execute a generative text benchmark against a target model backend.
411
399
@@ -414,53 +402,53 @@ def run(
414
402
Supports multiple backends, data sources, output formats, and constraint types
415
403
for flexible benchmark configuration.
416
404
"""
405
+ scenario = kwargs .pop ("scenario" )
406
+ click_ctx = click .get_current_context ()
407
+ overrides = cli_tools .set_if_not_default (click_ctx , ** kwargs )
408
+
409
+ try :
410
+ # If a scenario file was specified read from it
411
+ if scenario is None :
412
+ _scenario = GenerativeTextScenario .model_validate (overrides )
413
+ elif isinstance (scenario , Path ):
414
+ _scenario = GenerativeTextScenario .from_file (scenario , overrides )
415
+ else : # Only builtins can make it here; click will catch anything else
416
+ _scenario = GenerativeTextScenario .from_builtin (scenario , overrides )
417
+ except ValidationError as e :
418
+ # Translate pydantic valdation error to click argument error
419
+ errs = e .errors (include_url = False , include_context = True , include_input = True )
420
+ param_name = "--" + str (errs [0 ]["loc" ][0 ]).replace ("_" , "-" )
421
+ raise click .BadParameter (
422
+ errs [0 ]["msg" ], ctx = click_ctx , param_hint = param_name
423
+ ) from e
424
+
417
425
if HAS_UVLOOP :
418
426
asyncio .set_event_loop_policy (uvloop .EventLoopPolicy ())
419
427
asyncio .run (
420
428
benchmark_generative_text (
421
- target = target ,
422
- data = data ,
423
- profile = profile ,
424
- rate = rate ,
425
- random_seed = random_seed ,
426
- # Backend configuration
427
- backend = backend ,
428
- backend_kwargs = backend_kwargs ,
429
- model = model ,
430
- # Data configuration
431
- processor = processor ,
432
- processor_args = processor_args ,
433
- data_args = data_args ,
434
- data_sampler = data_sampler ,
429
+ scenario = _scenario ,
435
430
# Output configuration
436
- output_path = output_path ,
431
+ output_path = kwargs [ " output_path" ] ,
437
432
output_formats = [
438
433
fmt
439
- for fmt in output_formats
440
- if not disable_console_outputs or fmt != "console"
434
+ for fmt in kwargs [ " output_formats" ]
435
+ if not kwargs [ " disable_console_outputs" ] or fmt != "console"
441
436
],
442
437
# Updates configuration
443
438
progress = (
444
439
[
445
440
GenerativeConsoleBenchmarkerProgress (
446
- display_scheduler_stats = display_scheduler_stats
441
+ display_scheduler_stats = kwargs [ " display_scheduler_stats" ]
447
442
)
448
443
]
449
- if not disable_progress
444
+ if not kwargs [ " disable_progress" ]
450
445
else None
451
446
),
452
- print_updates = not disable_console_outputs ,
447
+ print_updates = not kwargs [ " disable_console_outputs" ] ,
453
448
# Aggregators configuration
454
- add_aggregators = {"extras" : InjectExtrasAggregator (extras = output_extras )},
455
- warmup = warmup ,
456
- cooldown = cooldown ,
457
- request_samples = request_samples ,
458
- # Constraints configuration
459
- max_seconds = max_seconds ,
460
- max_requests = max_requests ,
461
- max_errors = max_errors ,
462
- max_error_rate = max_error_rate ,
463
- max_global_error_rate = max_global_error_rate ,
449
+ add_aggregators = {
450
+ "extras" : InjectExtrasAggregator (extras = kwargs ["output_extras" ])
451
+ },
464
452
)
465
453
)
466
454
0 commit comments