Skip to content

Commit ab5466b

Browse files
[GuideLLM Refactor] Fix from-file (#366)
## Summary This PR ports the new functionality from `benchmark run` to `benchmark from-file`, and does so in a way that reuses as much code as practical to have one source of truth. ## Details <!-- Provide a detailed list of all changes introduced in this pull request. --> - Fixes from-file by making it to use the new output format. - Moves code related to the new output formats to separate functions that are called from both benchmark entrypoints. - Moves additional chunks of code out of the large benchmark run entrypoint function for modularity. ## Test Plan Run a benchmark with an output of json or yaml, and use `from-file` to re-import it and export it. You can select any output type supported by `benchmark run`. `guidellm benchmark from-file ./result.json --output-formats console` `guidellm benchmark from-file ./result.yaml --output-formats yaml` ## Related Issues --- - [x] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [x] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [ ] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`) --------- Signed-off-by: Jared O'Connell <[email protected]>
1 parent 6d0d4c2 commit ab5466b

File tree

2 files changed

+158
-83
lines changed

2 files changed

+158
-83
lines changed

src/guidellm/__main__.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -473,31 +473,38 @@ def run(
473473
)
474474
@click.option(
475475
"--output-path",
476-
type=click.Path(file_okay=True, dir_okay=True, exists=False),
477-
default=None,
478-
is_flag=False,
479-
flag_value=Path.cwd() / "benchmarks_reexported.json",
476+
type=click.Path(),
477+
default=Path.cwd(),
478+
help=(
479+
"Allows re-exporting the benchmarks to other formats. "
480+
"The path to save the output formats to, if the format is a file type. "
481+
"If it is a directory, it will save all output formats selected under it. "
482+
"If it is a file, it will save the corresponding output format to that file. "
483+
"Any output formats that were given that do not match the file extension will "
484+
"be saved in the parent directory of the file path. "
485+
"Defaults to the current working directory. "
486+
),
487+
)
488+
@click.option(
489+
"--output-formats",
490+
multiple=True,
491+
type=str,
492+
default=("console", "json"), # ("console", "json", "html", "csv")
480493
help=(
481-
"Allows re-exporting the benchmarks to another format. "
482-
"The path to save the output to. If it is a directory, "
483-
"it will save benchmarks.json under it. "
484-
"Otherwise, json, yaml, or csv files are supported for output types "
485-
"which will be read from the extension for the file path. "
486-
"This input is optional. If the output path flag is not provided, "
487-
"the benchmarks will not be reexported. If the flag is present but "
488-
"no value is specified, it will default to the current directory "
489-
"with the file name `benchmarks_reexported.json`."
494+
"The output formats to use for the benchmark results. "
495+
"Defaults to console, json, html, and csv where the file formats "
496+
"will be saved at the specified output path."
490497
),
491498
)
492-
def from_file(path, output_path):
499+
def from_file(path, output_path, output_formats):
493500
"""
494501
Load and optionally re-export a previously saved benchmark report.
495502
496503
Imports benchmark results from a saved file and provides optional conversion
497504
to different output formats. Supports JSON, YAML, and CSV export formats
498505
based on the output file extension.
499506
"""
500-
reimport_benchmarks_report(path, output_path)
507+
asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
501508

502509

503510
@cli.command(

src/guidellm/benchmark/entrypoints.py

Lines changed: 136 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from guidellm.benchmark.benchmarker import Benchmarker
2727
from guidellm.benchmark.objects import GenerativeBenchmark, GenerativeBenchmarksReport
2828
from guidellm.benchmark.output import (
29-
GenerativeBenchmarkerConsole,
3029
GenerativeBenchmarkerOutput,
3130
)
3231
from guidellm.benchmark.profile import Profile, ProfileType
@@ -53,6 +52,97 @@
5352
_CURRENT_WORKING_DIR = Path.cwd()
5453

5554

55+
# Data types
56+
57+
DataType = (
58+
Iterable[str]
59+
| Iterable[dict[str, Any]]
60+
| Dataset
61+
| DatasetDict
62+
| IterableDataset
63+
| IterableDatasetDict
64+
| str
65+
| Path
66+
)
67+
68+
OutputFormatType = (
69+
tuple[str, ...]
70+
| list[str]
71+
| dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
72+
| None
73+
)
74+
75+
76+
# Helper functions
77+
78+
async def initialize_backend(
79+
backend: BackendType | Backend,
80+
target: str,
81+
model: str | None,
82+
backend_kwargs: dict[str, Any] | None,
83+
) -> Backend:
84+
backend = (
85+
Backend.create(
86+
backend, target=target, model=model, **(backend_kwargs or {})
87+
)
88+
if not isinstance(backend, Backend)
89+
else backend
90+
)
91+
await backend.process_startup()
92+
await backend.validate()
93+
return backend
94+
95+
96+
async def resolve_profile(
97+
constraint_inputs: dict[str, int | float],
98+
profile: Profile | str | None,
99+
rate: list[float] | None,
100+
random_seed: int,
101+
constraints: dict[str, ConstraintInitializer | Any],
102+
):
103+
for key, val in constraint_inputs.items():
104+
if val is not None:
105+
constraints[key] = val
106+
if not isinstance(profile, Profile):
107+
if isinstance(profile, str):
108+
profile = Profile.create(
109+
rate_type=profile,
110+
rate=rate,
111+
random_seed=random_seed,
112+
constraints={**constraints},
113+
)
114+
else:
115+
raise ValueError(f"Expected string for profile; got {type(profile)}")
116+
117+
elif constraints:
118+
raise ValueError(
119+
"Constraints must be empty when providing a Profile instance. "
120+
f"Provided constraints: {constraints} ; provided profile: {profile}"
121+
)
122+
return profile
123+
124+
async def resolve_output_formats(
125+
output_formats: OutputFormatType,
126+
output_path: str | Path | None,
127+
) -> dict[str, GenerativeBenchmarkerOutput]:
128+
output_formats = GenerativeBenchmarkerOutput.resolve(
129+
output_formats=(output_formats or {}), output_path=output_path
130+
)
131+
return output_formats
132+
133+
async def finalize_outputs(
134+
report: GenerativeBenchmarksReport,
135+
resolved_output_formats: dict[str, GenerativeBenchmarkerOutput]
136+
):
137+
output_format_results = {}
138+
for key, output in resolved_output_formats.items():
139+
output_result = await output.finalize(report)
140+
output_format_results[key] = output_result
141+
return output_format_results
142+
143+
144+
# Complete entrypoints
145+
56146
async def benchmark_with_scenario(scenario: Scenario, **kwargs):
57147
"""
58148
Run a benchmark using a scenario and specify any extra arguments
@@ -67,16 +157,7 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
67157
# @validate_call(config={"arbitrary_types_allowed": True})
68158
async def benchmark_generative_text( # noqa: C901
69159
target: str,
70-
data: (
71-
Iterable[str]
72-
| Iterable[dict[str, Any]]
73-
| Dataset
74-
| DatasetDict
75-
| IterableDataset
76-
| IterableDatasetDict
77-
| str
78-
| Path
79-
),
160+
data: DataType,
80161
profile: StrategyType | ProfileType | Profile,
81162
rate: float | list[float] | None = None,
82163
random_seed: int = 42,
@@ -91,12 +172,7 @@ async def benchmark_generative_text( # noqa: C901
91172
data_sampler: Literal["random"] | None = None,
92173
# Output configuration
93174
output_path: str | Path | None = _CURRENT_WORKING_DIR,
94-
output_formats: (
95-
tuple[str, ...]
96-
| list[str]
97-
| dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
98-
| None
99-
) = ("console", "json", "html", "csv"),
175+
output_formats: OutputFormatType = ("console", "json", "html", "csv"),
100176
# Updates configuration
101177
progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None,
102178
print_updates: bool = False,
@@ -120,16 +196,7 @@ async def benchmark_generative_text( # noqa: C901
120196
with console.print_update_step(
121197
title=f"Initializing backend {backend}"
122198
) as console_step:
123-
backend = (
124-
Backend.create(
125-
backend, target=target, model=model, **(backend_kwargs or {})
126-
)
127-
if not isinstance(backend, Backend)
128-
else backend
129-
)
130-
console_step.update(f"{backend.__class__.__name__} backend initialized")
131-
await backend.process_startup()
132-
await backend.validate()
199+
backend = await initialize_backend(backend, target, model, backend_kwargs)
133200
console_step.finish(
134201
title=f"{backend.__class__.__name__} backend initialized",
135202
details=backend.info,
@@ -190,27 +257,19 @@ async def benchmark_generative_text( # noqa: C901
190257
with console.print_update_step(
191258
title=f"Resolving profile {profile}"
192259
) as console_step:
193-
for key, val in {
194-
"max_seconds": max_seconds,
195-
"max_requests": max_requests,
196-
"max_errors": max_errors,
197-
"max_error_rate": max_error_rate,
198-
"max_global_error_rate": max_global_error_rate,
199-
}.items():
200-
if val is not None:
201-
constraints[key] = val
202-
if not isinstance(profile, Profile):
203-
profile = Profile.create(
204-
rate_type=profile,
205-
rate=rate,
206-
random_seed=random_seed,
207-
constraints={**constraints},
208-
)
209-
elif constraints:
210-
raise ValueError(
211-
"Constraints must be empty when providing a Profile instance. "
212-
f"Provided constraints: {constraints} ; provided profile: {profile}"
213-
)
260+
profile = await resolve_profile(
261+
{
262+
"max_seconds": max_seconds,
263+
"max_requests": max_requests,
264+
"max_errors": max_errors,
265+
"max_error_rate": max_error_rate,
266+
"max_global_error_rate": max_global_error_rate,
267+
},
268+
profile,
269+
rate,
270+
random_seed,
271+
constraints,
272+
)
214273
console_step.finish(
215274
title=f"{profile.__class__.__name__} profile resolved",
216275
details=InfoMixin.extract_from_obj(profile),
@@ -237,12 +296,10 @@ async def benchmark_generative_text( # noqa: C901
237296
)
238297

239298
with console.print_update_step(title="Resolving output formats") as console_step:
240-
output_formats = GenerativeBenchmarkerOutput.resolve(
241-
output_formats=(output_formats or {}), output_path=output_path
242-
)
299+
resolved_output_formats = await resolve_output_formats(output_formats, output_path)
243300
console_step.finish(
244301
title="Output formats resolved",
245-
details={key: str(val) for key, val in output_formats.items()},
302+
details={key: str(val) for key, val in resolved_output_formats.items()},
246303
status_level="success",
247304
)
248305

@@ -278,14 +335,11 @@ async def benchmark_generative_text( # noqa: C901
278335
if benchmark:
279336
report.benchmarks.append(benchmark)
280337

281-
output_format_results = {}
282-
for key, output in output_formats.items():
283-
output_result = await output.finalize(report)
284-
output_format_results[key] = output_result
338+
output_format_results = await finalize_outputs(report, resolved_output_formats)
285339

286340
console.print("\n\n")
287341
console.print_update(
288-
title=f"Benchmarking complete, generated {len(report.benchmarks)} benchmark(s)",
342+
title=f"Benchmarking complete; generated {len(report.benchmarks)} benchmark(s)",
289343
status="success",
290344
)
291345
for key, value in output_format_results.items():
@@ -294,20 +348,34 @@ async def benchmark_generative_text( # noqa: C901
294348
return report, output_format_results
295349

296350

297-
def reimport_benchmarks_report(file: Path, output_path: Path | None) -> None:
351+
async def reimport_benchmarks_report(
352+
file: Path,
353+
output_path: Path | None,
354+
output_formats: OutputFormatType = ("console", "json", "html", "csv"),
355+
) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
298356
"""
299357
The command-line entry point for re-importing and displaying an
300-
existing benchmarks report. Can also specify
358+
existing benchmarks report. Can also specify an output format.
301359
Assumes the file provided exists.
302360
"""
303-
report = GenerativeBenchmarksReport.load_file(file)
304-
console_output = GenerativeBenchmarkerConsole()
305-
console_output.finalize(report)
306361
console = Console()
362+
with console.print_update_step(
363+
title=f"Loading benchmarks from {file}"
364+
) as console_step:
365+
report = GenerativeBenchmarksReport.load_file(file)
366+
console_step.finish(f"Import of old benchmarks complete; loaded {len(report.benchmarks)} benchmark(s)")
367+
368+
with console.print_update_step(title="Resolving output formats") as console_step:
369+
resolved_output_formats = await resolve_output_formats(output_formats, output_path)
370+
console_step.finish(
371+
title="Output formats resolved",
372+
details={key: str(val) for key, val in resolved_output_formats.items()},
373+
status_level="success",
374+
)
307375

308-
if output_path:
309-
with console.print_update_step(
310-
title=f"Saving benchmarks report to {output_path}..."
311-
) as console_step:
312-
saved_path = report.save_file(output_path)
313-
console_step.finish(title=f"Benchmarks report saved to {saved_path}")
376+
output_format_results = await finalize_outputs(report, resolved_output_formats)
377+
378+
for key, value in output_format_results.items():
379+
console.print_update(title=f" {key:<8}: {value}", status="debug")
380+
381+
return report, output_format_results

0 commit comments

Comments
 (0)