Skip to content

Commit 27d5702

Browse files
authored
[GuideLLM Refactor] Scenarios reenablement (#362)
## Summary <!-- Include a short paragraph of the changes introduced in this PR. If this PR requires additional context or rationale, explain why the changes are necessary. --> ## Details <!-- Provide a detailed list of all changes introduced in this pull request. --> - [ ] ## Test Plan <!-- List the steps needed to test this PR. --> - ## Related Issues <!-- Link any relevant issues that this PR addresses. --> - Resolves # --- - [x] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [ ] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [ ] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`)
2 parents 6d31244 + 3057229 commit 27d5702

File tree

9 files changed

+323
-237
lines changed

9 files changed

+323
-237
lines changed

src/guidellm/__main__.py

Lines changed: 68 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from typing import Annotated, Union
3232

3333
import click
34+
from pydantic import ValidationError
3435

3536
try:
3637
import uvloop
@@ -55,6 +56,7 @@
5556
)
5657
from guidellm.benchmark.scenario import (
5758
GenerativeTextScenario,
59+
get_builtin_scenarios,
5860
)
5961
from guidellm.mock_server import MockServer, MockServerConfig
6062
from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset
@@ -135,6 +137,25 @@ def benchmark():
135137
help="Run a benchmark against a generative model using the specified arguments.",
136138
context_settings={"auto_envvar_prefix": "GUIDELLM"},
137139
)
140+
@click.option(
141+
"--scenario",
142+
type=cli_tools.Union(
143+
click.Path(
144+
exists=True,
145+
readable=True,
146+
file_okay=True,
147+
dir_okay=False,
148+
path_type=Path,
149+
),
150+
click.Choice(get_builtin_scenarios()),
151+
),
152+
default=None,
153+
help=(
154+
"The name of a builtin scenario or path to a config file. "
155+
"Missing values from the config will use defaults. "
156+
"Options specified on the commandline will override the scenario."
157+
),
158+
)
138159
@click.option(
139160
"--target",
140161
type=str,
@@ -161,7 +182,7 @@ def benchmark():
161182
)
162183
@click.option(
163184
"--rate",
164-
default=None,
185+
default=GenerativeTextScenario.get_default("rate"),
165186
help=(
166187
"The rates to run the benchmark at. "
167188
"Can be a single number or a comma-separated list of numbers. "
@@ -183,18 +204,18 @@ def benchmark():
183204
"--backend-type", # legacy alias
184205
"backend",
185206
type=click.Choice(list(get_literal_vals(BackendType))),
207+
default=GenerativeTextScenario.get_default("backend"),
186208
help=(
187209
"The type of backend to use to run requests against. Defaults to 'openai_http'."
188210
f" Supported types: {', '.join(get_literal_vals(BackendType))}"
189211
),
190-
default="openai_http",
191212
)
192213
@click.option(
193214
"--backend-kwargs",
194215
"--backend-args", # legacy alias
195216
"backend_kwargs",
196217
callback=cli_tools.parse_json,
197-
default=None,
218+
default=GenerativeTextScenario.get_default("backend_kwargs"),
198219
help=(
199220
"A JSON string containing any arguments to pass to the backend as a "
200221
"dict with **kwargs. Headers can be removed by setting their value to "
@@ -204,7 +225,7 @@ def benchmark():
204225
)
205226
@click.option(
206227
"--model",
207-
default=None,
228+
default=GenerativeTextScenario.get_default("model"),
208229
type=str,
209230
help=(
210231
"The ID of the model to benchmark within the backend. "
@@ -214,7 +235,7 @@ def benchmark():
214235
# Data configuration
215236
@click.option(
216237
"--processor",
217-
default=None,
238+
default=GenerativeTextScenario.get_default("processor"),
218239
type=str,
219240
help=(
220241
"The processor or tokenizer to use to calculate token counts for statistics "
@@ -224,7 +245,7 @@ def benchmark():
224245
)
225246
@click.option(
226247
"--processor-args",
227-
default=None,
248+
default=GenerativeTextScenario.get_default("processor_args"),
228249
callback=cli_tools.parse_json,
229250
help=(
230251
"A JSON string containing any arguments to pass to the processor constructor "
@@ -233,7 +254,7 @@ def benchmark():
233254
)
234255
@click.option(
235256
"--data-args",
236-
default=None,
257+
default=GenerativeTextScenario.get_default("data_args"),
237258
callback=cli_tools.parse_json,
238259
help=(
239260
"A JSON string containing any arguments to pass to the dataset creation "
@@ -242,7 +263,7 @@ def benchmark():
242263
)
243264
@click.option(
244265
"--data-sampler",
245-
default=None,
266+
default=GenerativeTextScenario.get_default("data_sampler"),
246267
type=click.Choice(["random"]),
247268
help=(
248269
"The data sampler type to use. 'random' will add a random shuffle on the data. "
@@ -301,7 +322,7 @@ def benchmark():
301322
"--warmup-percent", # legacy alias
302323
"warmup",
303324
type=float,
304-
default=None,
325+
default=GenerativeTextScenario.get_default("warmup"),
305326
help=(
306327
"The specification around the number of requests to run before benchmarking. "
307328
"If within (0, 1), then the percent of requests/time to use for warmup. "
@@ -315,7 +336,7 @@ def benchmark():
315336
"--cooldown-percent", # legacy alias
316337
"cooldown",
317338
type=float,
318-
default=GenerativeTextScenario.get_default("cooldown_percent"),
339+
default=GenerativeTextScenario.get_default("cooldown"),
319340
help=(
320341
"The specification around the number of requests to run after benchmarking. "
321342
"If within (0, 1), then the percent of requests/time to use for cooldown. "
@@ -328,19 +349,19 @@ def benchmark():
328349
"--request-samples",
329350
"--output-sampling", # legacy alias
330351
"request_samples",
352+
default=GenerativeTextScenario.get_default("request_samples"),
331353
type=int,
332354
help=(
333355
"The number of samples for each request status and each benchmark to save "
334356
"in the output file. If None (default), will save all samples. "
335357
"Defaults to 20."
336358
),
337-
default=20,
338359
)
339360
# Constraints configuration
340361
@click.option(
341362
"--max-seconds",
342363
type=float,
343-
default=None,
364+
default=GenerativeTextScenario.get_default("max_seconds"),
344365
help=(
345366
"The maximum number of seconds each benchmark can run for. "
346367
"If None, will run until max_requests or the data is exhausted."
@@ -349,7 +370,7 @@ def benchmark():
349370
@click.option(
350371
"--max-requests",
351372
type=int,
352-
default=None,
373+
default=GenerativeTextScenario.get_default("max_requests"),
353374
help=(
354375
"The maximum number of requests each benchmark can run for. "
355376
"If None, will run until max_seconds or the data is exhausted."
@@ -358,55 +379,22 @@ def benchmark():
358379
@click.option(
359380
"--max-errors",
360381
type=int,
361-
default=None,
382+
default=GenerativeTextScenario.get_default("max_errors"),
362383
help="Maximum number of errors allowed before stopping the benchmark",
363384
)
364385
@click.option(
365386
"--max-error-rate",
366387
type=float,
367-
default=None,
388+
default=GenerativeTextScenario.get_default("max_error_rate"),
368389
help="Maximum error rate allowed before stopping the benchmark",
369390
)
370391
@click.option(
371392
"--max-global-error-rate",
372393
type=float,
373-
default=None,
394+
default=GenerativeTextScenario.get_default("max_global_error_rate"),
374395
help="Maximum global error rate allowed across all benchmarks",
375396
)
376-
def run(
377-
target,
378-
data,
379-
profile,
380-
rate,
381-
random_seed,
382-
# Backend Configuration
383-
backend,
384-
backend_kwargs,
385-
model,
386-
# Data configuration
387-
processor,
388-
processor_args,
389-
data_args,
390-
data_sampler,
391-
# Output configuration
392-
output_path,
393-
output_formats,
394-
# Updates configuration
395-
disable_console_outputs,
396-
disable_progress,
397-
display_scheduler_stats,
398-
# Aggregators configuration
399-
output_extras,
400-
warmup,
401-
cooldown,
402-
request_samples,
403-
# Constraints configuration
404-
max_seconds,
405-
max_requests,
406-
max_errors,
407-
max_error_rate,
408-
max_global_error_rate,
409-
):
397+
def run(**kwargs):
410398
"""
411399
Execute a generative text benchmark against a target model backend.
412400
@@ -415,53 +403,53 @@ def run(
415403
Supports multiple backends, data sources, output formats, and constraint types
416404
for flexible benchmark configuration.
417405
"""
406+
scenario = kwargs.pop("scenario")
407+
click_ctx = click.get_current_context()
408+
overrides = cli_tools.set_if_not_default(click_ctx, **kwargs)
409+
410+
try:
411+
# If a scenario file was specified read from it
412+
if scenario is None:
413+
_scenario = GenerativeTextScenario.model_validate(overrides)
414+
elif isinstance(scenario, Path):
415+
_scenario = GenerativeTextScenario.from_file(scenario, overrides)
416+
else: # Only builtins can make it here; click will catch anything else
417+
_scenario = GenerativeTextScenario.from_builtin(scenario, overrides)
418+
except ValidationError as e:
419+
# Translate pydantic valdation error to click argument error
420+
errs = e.errors(include_url=False, include_context=True, include_input=True)
421+
param_name = "--" + str(errs[0]["loc"][0]).replace("_", "-")
422+
raise click.BadParameter(
423+
errs[0]["msg"], ctx=click_ctx, param_hint=param_name
424+
) from e
425+
418426
if HAS_UVLOOP:
419427
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
420428
asyncio.run(
421429
benchmark_generative_text(
422-
target=target,
423-
data=data,
424-
profile=profile,
425-
rate=rate,
426-
random_seed=random_seed,
427-
# Backend configuration
428-
backend=backend,
429-
backend_kwargs=backend_kwargs,
430-
model=model,
431-
# Data configuration
432-
processor=processor,
433-
processor_args=processor_args,
434-
data_args=data_args,
435-
data_sampler=data_sampler,
430+
scenario=_scenario,
436431
# Output configuration
437-
output_path=output_path,
432+
output_path=kwargs["output_path"],
438433
output_formats=[
439434
fmt
440-
for fmt in output_formats
441-
if not disable_console_outputs or fmt != "console"
435+
for fmt in kwargs["output_formats"]
436+
if not kwargs["disable_console_outputs"] or fmt != "console"
442437
],
443438
# Updates configuration
444439
progress=(
445440
[
446441
GenerativeConsoleBenchmarkerProgress(
447-
display_scheduler_stats=display_scheduler_stats
442+
display_scheduler_stats=kwargs["display_scheduler_stats"]
448443
)
449444
]
450-
if not disable_progress
445+
if not kwargs["disable_progress"]
451446
else None
452447
),
453-
print_updates=not disable_console_outputs,
448+
print_updates=not kwargs["disable_console_outputs"],
454449
# Aggregators configuration
455-
add_aggregators={"extras": InjectExtrasAggregator(extras=output_extras)},
456-
warmup=warmup,
457-
cooldown=cooldown,
458-
request_samples=request_samples,
459-
# Constraints configuration
460-
max_seconds=max_seconds,
461-
max_requests=max_requests,
462-
max_errors=max_errors,
463-
max_error_rate=max_error_rate,
464-
max_global_error_rate=max_global_error_rate,
450+
add_aggregators={
451+
"extras": InjectExtrasAggregator(extras=kwargs["output_extras"])
452+
},
465453
)
466454
)
467455

src/guidellm/benchmark/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,23 @@
4040
BenchmarkerProgressGroup,
4141
GenerativeConsoleBenchmarkerProgress,
4242
)
43+
from .scenario import (
44+
GenerativeTextScenario,
45+
Scenario,
46+
enable_scenarios,
47+
get_builtin_scenarios,
48+
)
49+
from .types import (
50+
AggregatorInputT,
51+
DataInputT,
52+
OutputFormatT,
53+
ProcessorInputT,
54+
ProgressInputT,
55+
)
4356

4457
__all__ = [
4558
"Aggregator",
59+
"AggregatorInputT",
4660
"AggregatorState",
4761
"AsyncProfile",
4862
"Benchmark",
@@ -54,6 +68,7 @@
5468
"BenchmarkerProgressGroup",
5569
"CompilableAggregator",
5670
"ConcurrentProfile",
71+
"DataInputT",
5772
"GenerativeBenchmark",
5873
"GenerativeBenchmarkerCSV",
5974
"GenerativeBenchmarkerConsole",
@@ -65,14 +80,21 @@
6580
"GenerativeRequestStats",
6681
"GenerativeRequestsAggregator",
6782
"GenerativeStatsProgressAggregator",
83+
"GenerativeTextScenario",
6884
"InjectExtrasAggregator",
85+
"OutputFormatT",
86+
"ProcessorInputT",
6987
"Profile",
7088
"ProfileType",
89+
"ProgressInputT",
90+
"Scenario",
7191
"SchedulerStatsAggregator",
7292
"SerializableAggregator",
7393
"SweepProfile",
7494
"SynchronousProfile",
7595
"ThroughputProfile",
7696
"benchmark_generative_text",
97+
"enable_scenarios",
98+
"get_builtin_scenarios",
7799
"reimport_benchmarks_report",
78100
]

src/guidellm/benchmark/benchmarker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ async def run(
124124
backend=backend,
125125
strategy=strategy,
126126
env=environment,
127-
**constraints,
127+
**constraints or {},
128128
):
129129
aggregators_update = AggregatorState()
130130
for key, aggregator in benchmark_aggregators.items():

0 commit comments

Comments
 (0)