diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 675003a9..9d85346b 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -473,23 +473,30 @@ def run( ) @click.option( "--output-path", - type=click.Path(file_okay=True, dir_okay=True, exists=False), - default=None, - is_flag=False, - flag_value=Path.cwd() / "benchmarks_reexported.json", + type=click.Path(), + default=Path.cwd(), + help=( + "Allows re-exporting the benchmarks to other formats. " + "The path to save the output formats to, if the format is a file type. " + "If it is a directory, it will save all output formats selected under it. " + "If it is a file, it will save the corresponding output format to that file. " + "Any output formats that were given that do not match the file extension will " + "be saved in the parent directory of the file path. " + "Defaults to the current working directory. " + ), +) +@click.option( + "--output-formats", + multiple=True, + type=str, + default=("console", "json"), # ("console", "json", "html", "csv") help=( - "Allows re-exporting the benchmarks to another format. " - "The path to save the output to. If it is a directory, " - "it will save benchmarks.json under it. " - "Otherwise, json, yaml, or csv files are supported for output types " - "which will be read from the extension for the file path. " - "This input is optional. If the output path flag is not provided, " - "the benchmarks will not be reexported. If the flag is present but " - "no value is specified, it will default to the current directory " - "with the file name `benchmarks_reexported.json`." + "The output formats to use for the benchmark results. " + "Defaults to console, json, html, and csv where the file formats " + "will be saved at the specified output path." ), ) -def from_file(path, output_path): +def from_file(path, output_path, output_formats): """ Load and optionally re-export a previously saved benchmark report. @@ -497,7 +504,7 @@ def from_file(path, output_path): to different output formats. Supports JSON, YAML, and CSV export formats based on the output file extension. """ - reimport_benchmarks_report(path, output_path) + asyncio.run(reimport_benchmarks_report(path, output_path, output_formats)) @cli.command( diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 60077ee8..828402d8 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -26,7 +26,6 @@ from guidellm.benchmark.benchmarker import Benchmarker from guidellm.benchmark.objects import GenerativeBenchmark, GenerativeBenchmarksReport from guidellm.benchmark.output import ( - GenerativeBenchmarkerConsole, GenerativeBenchmarkerOutput, ) from guidellm.benchmark.profile import Profile, ProfileType @@ -53,6 +52,97 @@ _CURRENT_WORKING_DIR = Path.cwd() +# Data types + +DataType = ( + Iterable[str] + | Iterable[dict[str, Any]] + | Dataset + | DatasetDict + | IterableDataset + | IterableDatasetDict + | str + | Path +) + +OutputFormatType = ( + tuple[str, ...] + | list[str] + | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput] + | None +) + + +# Helper functions + +async def initialize_backend( + backend: BackendType | Backend, + target: str, + model: str | None, + backend_kwargs: dict[str, Any] | None, +) -> Backend: + backend = ( + Backend.create( + backend, target=target, model=model, **(backend_kwargs or {}) + ) + if not isinstance(backend, Backend) + else backend + ) + await backend.process_startup() + await backend.validate() + return backend + + +async def resolve_profile( + constraint_inputs: dict[str, int | float], + profile: Profile | str | None, + rate: list[float] | None, + random_seed: int, + constraints: dict[str, ConstraintInitializer | Any], +): + for key, val in constraint_inputs.items(): + if val is not None: + constraints[key] = val + if not isinstance(profile, Profile): + if isinstance(profile, str): + profile = Profile.create( + rate_type=profile, + rate=rate, + random_seed=random_seed, + constraints={**constraints}, + ) + else: + raise ValueError(f"Expected string for profile; got {type(profile)}") + + elif constraints: + raise ValueError( + "Constraints must be empty when providing a Profile instance. " + f"Provided constraints: {constraints} ; provided profile: {profile}" + ) + return profile + +async def resolve_output_formats( + output_formats: OutputFormatType, + output_path: str | Path | None, +) -> dict[str, GenerativeBenchmarkerOutput]: + output_formats = GenerativeBenchmarkerOutput.resolve( + output_formats=(output_formats or {}), output_path=output_path + ) + return output_formats + +async def finalize_outputs( + report: GenerativeBenchmarksReport, + resolved_output_formats: dict[str, GenerativeBenchmarkerOutput] +): + output_format_results = {} + for key, output in resolved_output_formats.items(): + output_result = await output.finalize(report) + output_format_results[key] = output_result + return output_format_results + + +# Complete entrypoints + async def benchmark_with_scenario(scenario: Scenario, **kwargs): """ Run a benchmark using a scenario and specify any extra arguments @@ -67,16 +157,7 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs): # @validate_call(config={"arbitrary_types_allowed": True}) async def benchmark_generative_text( # noqa: C901 target: str, - data: ( - Iterable[str] - | Iterable[dict[str, Any]] - | Dataset - | DatasetDict - | IterableDataset - | IterableDatasetDict - | str - | Path - ), + data: DataType, profile: StrategyType | ProfileType | Profile, rate: float | list[float] | None = None, random_seed: int = 42, @@ -91,12 +172,7 @@ async def benchmark_generative_text( # noqa: C901 data_sampler: Literal["random"] | None = None, # Output configuration output_path: str | Path | None = _CURRENT_WORKING_DIR, - output_formats: ( - tuple[str, ...] - | list[str] - | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput] - | None - ) = ("console", "json", "html", "csv"), + output_formats: OutputFormatType = ("console", "json", "html", "csv"), # Updates configuration progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None, print_updates: bool = False, @@ -120,16 +196,7 @@ async def benchmark_generative_text( # noqa: C901 with console.print_update_step( title=f"Initializing backend {backend}" ) as console_step: - backend = ( - Backend.create( - backend, target=target, model=model, **(backend_kwargs or {}) - ) - if not isinstance(backend, Backend) - else backend - ) - console_step.update(f"{backend.__class__.__name__} backend initialized") - await backend.process_startup() - await backend.validate() + backend = await initialize_backend(backend, target, model, backend_kwargs) console_step.finish( title=f"{backend.__class__.__name__} backend initialized", details=backend.info, @@ -190,27 +257,19 @@ async def benchmark_generative_text( # noqa: C901 with console.print_update_step( title=f"Resolving profile {profile}" ) as console_step: - for key, val in { - "max_seconds": max_seconds, - "max_requests": max_requests, - "max_errors": max_errors, - "max_error_rate": max_error_rate, - "max_global_error_rate": max_global_error_rate, - }.items(): - if val is not None: - constraints[key] = val - if not isinstance(profile, Profile): - profile = Profile.create( - rate_type=profile, - rate=rate, - random_seed=random_seed, - constraints={**constraints}, - ) - elif constraints: - raise ValueError( - "Constraints must be empty when providing a Profile instance. " - f"Provided constraints: {constraints} ; provided profile: {profile}" - ) + profile = await resolve_profile( + { + "max_seconds": max_seconds, + "max_requests": max_requests, + "max_errors": max_errors, + "max_error_rate": max_error_rate, + "max_global_error_rate": max_global_error_rate, + }, + profile, + rate, + random_seed, + constraints, + ) console_step.finish( title=f"{profile.__class__.__name__} profile resolved", details=InfoMixin.extract_from_obj(profile), @@ -237,12 +296,10 @@ async def benchmark_generative_text( # noqa: C901 ) with console.print_update_step(title="Resolving output formats") as console_step: - output_formats = GenerativeBenchmarkerOutput.resolve( - output_formats=(output_formats or {}), output_path=output_path - ) + resolved_output_formats = await resolve_output_formats(output_formats, output_path) console_step.finish( title="Output formats resolved", - details={key: str(val) for key, val in output_formats.items()}, + details={key: str(val) for key, val in resolved_output_formats.items()}, status_level="success", ) @@ -278,14 +335,11 @@ async def benchmark_generative_text( # noqa: C901 if benchmark: report.benchmarks.append(benchmark) - output_format_results = {} - for key, output in output_formats.items(): - output_result = await output.finalize(report) - output_format_results[key] = output_result + output_format_results = await finalize_outputs(report, resolved_output_formats) console.print("\n\n") console.print_update( - title=f"Benchmarking complete, generated {len(report.benchmarks)} benchmark(s)", + title=f"Benchmarking complete; generated {len(report.benchmarks)} benchmark(s)", status="success", ) for key, value in output_format_results.items(): @@ -294,20 +348,34 @@ async def benchmark_generative_text( # noqa: C901 return report, output_format_results -def reimport_benchmarks_report(file: Path, output_path: Path | None) -> None: +async def reimport_benchmarks_report( + file: Path, + output_path: Path | None, + output_formats: OutputFormatType = ("console", "json", "html", "csv"), +) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]: """ The command-line entry point for re-importing and displaying an - existing benchmarks report. Can also specify + existing benchmarks report. Can also specify an output format. Assumes the file provided exists. """ - report = GenerativeBenchmarksReport.load_file(file) - console_output = GenerativeBenchmarkerConsole() - console_output.finalize(report) console = Console() + with console.print_update_step( + title=f"Loading benchmarks from {file}" + ) as console_step: + report = GenerativeBenchmarksReport.load_file(file) + console_step.finish(f"Import of old benchmarks complete; loaded {len(report.benchmarks)} benchmark(s)") + + with console.print_update_step(title="Resolving output formats") as console_step: + resolved_output_formats = await resolve_output_formats(output_formats, output_path) + console_step.finish( + title="Output formats resolved", + details={key: str(val) for key, val in resolved_output_formats.items()}, + status_level="success", + ) - if output_path: - with console.print_update_step( - title=f"Saving benchmarks report to {output_path}..." - ) as console_step: - saved_path = report.save_file(output_path) - console_step.finish(title=f"Benchmarks report saved to {saved_path}") + output_format_results = await finalize_outputs(report, resolved_output_formats) + + for key, value in output_format_results.items(): + console.print_update(title=f" {key:<8}: {value}", status="debug") + + return report, output_format_results