From 1f457e8e3b98847ccbcd03b1aa70442f8da48f7b Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Sat, 20 Sep 2025 00:19:49 -0400 Subject: [PATCH 1/3] Progress towards fixing from-file Signed-off-by: Jared O'Connell --- src/guidellm/__main__.py | 2 +- src/guidellm/benchmark/entrypoints.py | 335 ++++++++++++++++---------- 2 files changed, 210 insertions(+), 127 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 675003a9..336f9777 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -497,7 +497,7 @@ def from_file(path, output_path): to different output formats. Supports JSON, YAML, and CSV export formats based on the output file extension. """ - reimport_benchmarks_report(path, output_path) + asyncio.run(reimport_benchmarks_report(path, output_path)) @cli.command( diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 60077ee8..1e164fe3 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -41,7 +41,7 @@ NonDistributedEnvironment, StrategyType, ) -from guidellm.utils import Console, InfoMixin +from guidellm.utils import Console, InfoMixin, ConsoleUpdateStep __all__ = [ "benchmark_generative_text", @@ -52,6 +52,173 @@ _CURRENT_WORKING_DIR = Path.cwd() +DataType = ( + Iterable[str] + | Iterable[dict[str, Any]] + | Dataset + | DatasetDict + | IterableDataset + | IterableDatasetDict + | str + | Path +) + +ProcessorType = str | Path | PreTrainedTokenizerBase + +OutputFormatType = ( + tuple[str, ...] + | list[str] + | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput] + | None +) + + +# TODO: Determine final location of the helper functions. + + +async def initialize_backend( + backend: BackendType | Backend, + target: str, + model: str | None, + backend_kwargs: dict[str, Any] | None, + console_step: ConsoleUpdateStep, +) -> Backend: + backend = ( + Backend.create( + backend, target=target, model=model, **(backend_kwargs or {}) + ) + if not isinstance(backend, Backend) + else backend + ) + console_step.update(f"{backend.__class__.__name__} backend initialized") + await backend.process_startup() + await backend.validate() + console_step.finish( + title=f"{backend.__class__.__name__} backend initialized", + details=backend.info, + status_level="success", + ) + return backend + + +async def resolve_processor( + processor: ProcessorType | None, + model: str | None, + backend: BackendType | Backend, + console_step: ConsoleUpdateStep, +) -> ProcessorType: + if processor is not None: + console_step.finish( + title="Processor resolved", + details=f"Using processor '{processor}'", + status_level="success", + ) + elif model is not None: + console_step.finish( + title="Processor resolved", + details=f"Using model '{model}' as processor", + status_level="success", + ) + processor = model + else: + console_step.update( + title="Resolving processor from backend.default_model", + status_level="info", + ) + processor = await backend.default_model() + console_step.finish( + title="Processor resolved", + details=( + f"Using model '{processor}' from backend " + f"{backend.__class__.__name__} as processor" + ), + status_level="success", + ) + await backend.process_shutdown() + return processor + + +async def init_request_loader( + data: DataType, + data_args: dict[str, Any] | None, + processor: ProcessorType, + console_step: ConsoleUpdateStep, +) -> GenerativeRequestLoader: + request_loader = GenerativeRequestLoader( + data=data, + data_args=data_args, + processor=processor, + processor_args=processor_args, + shuffle=data_sampler == "random", + random_seed=random_seed, + ) + unique_requests = request_loader.num_unique_items(raise_err=False) + console_step.finish( + title=( + f"Request loader initialized with {unique_requests} unique requests " + f"from {data}" + ), + details=InfoMixin.extract_from_obj(request_loader), + status_level="success", + ) + return request_loader + +async def resolve_profile( + constraint_inputs: dict[str, int | float], + profile: Profile | None, + rate: list[float] | None, + random_seed: int, + constraints: dict[str, ConstraintInitializer | Any], + console_step: ConsoleUpdateStep, +): + for key, val in constraint_inputs.items(): + if val is not None: + constraints[key] = val + if not isinstance(profile, Profile): + profile = Profile.create( + rate_type=profile, + rate=rate, + random_seed=random_seed, + constraints={**constraints}, + ) + elif constraints: + raise ValueError( + "Constraints must be empty when providing a Profile instance. " + f"Provided constraints: {constraints} ; provided profile: {profile}" + ) + console_step.finish( + title=f"{profile.__class__.__name__} profile resolved", + details=InfoMixin.extract_from_obj(profile), + status_level="success", + ) + +async def resolve_output_formats( + output_formats: OutputFormatType, + output_path: str | Path | None, + console_step: ConsoleUpdateStep, +) -> dict[str, GenerativeBenchmarkerOutput]: + output_formats = GenerativeBenchmarkerOutput.resolve( + output_formats=(output_formats or {}), output_path=output_path + ) + console_step.finish( + title="Output formats resolved", + details={key: str(val) for key, val in output_formats.items()}, + status_level="success", + ) + return output_formats + +async def finalize_outputs( + report: GenerativeBenchmarksReport, + resolved_output_formats: dict[str, GenerativeBenchmarkerOutput] +): + output_format_results = {} + for key, output in resolved_output_formats.items(): + output_result = await output.finalize(report) + output_format_results[key] = output_result + return output_format_results + +# End of helper functions. + async def benchmark_with_scenario(scenario: Scenario, **kwargs): """ @@ -67,16 +234,7 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs): # @validate_call(config={"arbitrary_types_allowed": True}) async def benchmark_generative_text( # noqa: C901 target: str, - data: ( - Iterable[str] - | Iterable[dict[str, Any]] - | Dataset - | DatasetDict - | IterableDataset - | IterableDatasetDict - | str - | Path - ), + data: DataType, profile: StrategyType | ProfileType | Profile, rate: float | list[float] | None = None, random_seed: int = 42, @@ -91,12 +249,7 @@ async def benchmark_generative_text( # noqa: C901 data_sampler: Literal["random"] | None = None, # Output configuration output_path: str | Path | None = _CURRENT_WORKING_DIR, - output_formats: ( - tuple[str, ...] - | list[str] - | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput] - | None - ) = ("console", "json", "html", "csv"), + output_formats: OutputFormatType = ("console", "json", "html", "csv"), # Updates configuration progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None, print_updates: bool = False, @@ -120,101 +273,32 @@ async def benchmark_generative_text( # noqa: C901 with console.print_update_step( title=f"Initializing backend {backend}" ) as console_step: - backend = ( - Backend.create( - backend, target=target, model=model, **(backend_kwargs or {}) - ) - if not isinstance(backend, Backend) - else backend - ) - console_step.update(f"{backend.__class__.__name__} backend initialized") - await backend.process_startup() - await backend.validate() - console_step.finish( - title=f"{backend.__class__.__name__} backend initialized", - details=backend.info, - status_level="success", - ) + backend = await initialize_backend(backend) with console.print_update_step(title="Resolving processor") as console_step: - if processor is not None: - console_step.finish( - title="Processor resolved", - details=f"Using processor '{processor}'", - status_level="success", - ) - elif model is not None: - console_step.finish( - title="Processor resolved", - details=f"Using model '{model}' as processor", - status_level="success", - ) - processor = model - else: - console_step.update( - title="Resolving processor from backend.default_model", - status_level="info", - ) - processor = await backend.default_model() - console_step.finish( - title="Processor resolved", - details=( - f"Using model '{processor}' from backend " - f"{backend.__class__.__name__} as processor" - ), - status_level="success", - ) - await backend.process_shutdown() + await resolve_processor(processor, model, backend, console_step) with console.print_update_step( title=f"Initializing request loader from {data}" ) as console_step: - request_loader = GenerativeRequestLoader( - data=data, - data_args=data_args, - processor=processor, - processor_args=processor_args, - shuffle=data_sampler == "random", - random_seed=random_seed, - ) - unique_requests = request_loader.num_unique_items(raise_err=False) - console_step.finish( - title=( - f"Request loader initialized with {unique_requests} unique requests " - f"from {data}" - ), - details=InfoMixin.extract_from_obj(request_loader), - status_level="success", - ) + request_loader = init_request_loader(data, data_args, processor, console_step) with console.print_update_step( title=f"Resolving profile {profile}" ) as console_step: - for key, val in { - "max_seconds": max_seconds, - "max_requests": max_requests, - "max_errors": max_errors, - "max_error_rate": max_error_rate, - "max_global_error_rate": max_global_error_rate, - }.items(): - if val is not None: - constraints[key] = val - if not isinstance(profile, Profile): - profile = Profile.create( - rate_type=profile, - rate=rate, - random_seed=random_seed, - constraints={**constraints}, - ) - elif constraints: - raise ValueError( - "Constraints must be empty when providing a Profile instance. " - f"Provided constraints: {constraints} ; provided profile: {profile}" - ) - console_step.finish( - title=f"{profile.__class__.__name__} profile resolved", - details=InfoMixin.extract_from_obj(profile), - status_level="success", + resolve_profile( + { + "max_seconds": max_seconds, + "max_requests": max_requests, + "max_errors": max_errors, + "max_error_rate": max_error_rate, + "max_global_error_rate": max_global_error_rate, + }, + profile, + rate, + random_seed, + constraints, + console_step, ) with console.print_update_step( @@ -237,14 +321,7 @@ async def benchmark_generative_text( # noqa: C901 ) with console.print_update_step(title="Resolving output formats") as console_step: - output_formats = GenerativeBenchmarkerOutput.resolve( - output_formats=(output_formats or {}), output_path=output_path - ) - console_step.finish( - title="Output formats resolved", - details={key: str(val) for key, val in output_formats.items()}, - status_level="success", - ) + resolved_output_formats = resolve_output_formats(output_formats, output_path, console_step) progress_group = BenchmarkerProgressGroup( instances=progress or [], enabled=bool(progress) @@ -278,14 +355,11 @@ async def benchmark_generative_text( # noqa: C901 if benchmark: report.benchmarks.append(benchmark) - output_format_results = {} - for key, output in output_formats.items(): - output_result = await output.finalize(report) - output_format_results[key] = output_result + output_format_results = finalize_outputs(report, resolved_output_formats) console.print("\n\n") console.print_update( - title=f"Benchmarking complete, generated {len(report.benchmarks)} benchmark(s)", + title=f"Benchmarking complete; generated {len(report.benchmarks)} benchmark(s)", status="success", ) for key, value in output_format_results.items(): @@ -294,20 +368,29 @@ async def benchmark_generative_text( # noqa: C901 return report, output_format_results -def reimport_benchmarks_report(file: Path, output_path: Path | None) -> None: +async def reimport_benchmarks_report( + file: Path, + output_path: Path | None, + output_formats: OutputFormatType = ("console", "json", "html", "csv"), +) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]: """ The command-line entry point for re-importing and displaying an - existing benchmarks report. Can also specify + existing benchmarks report. Can also specify an output format. Assumes the file provided exists. """ - report = GenerativeBenchmarksReport.load_file(file) - console_output = GenerativeBenchmarkerConsole() - console_output.finalize(report) console = Console() + with console.print_update_step( + title=f"Loading benchmarks from {file}" + ) as console_step: + report = GenerativeBenchmarksReport.load_file(file) + console_step.finish(f"Import of old benchmarks complete; loaded {len(report.benchmarks)} benchmark(s)") + + with console.print_update_step(title="Resolving output formats") as console_step: + resolved_output_formats = await resolve_output_formats(output_formats, output_path, console_step) + + output_format_results = await finalize_outputs(report, resolved_output_formats) - if output_path: - with console.print_update_step( - title=f"Saving benchmarks report to {output_path}..." - ) as console_step: - saved_path = report.save_file(output_path) - console_step.finish(title=f"Benchmarks report saved to {saved_path}") + for key, value in output_format_results.items(): + console.print_update(title=f" {key:<8}: {value}", status="debug") + + return report, output_format_results From a2100dd16b147741e4bf94a267e4947be9e08345 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Wed, 24 Sep 2025 12:07:32 -0400 Subject: [PATCH 2/3] Fix from-file Signed-off-by: Jared O'Connell --- src/guidellm/__main__.py | 37 ++--- src/guidellm/benchmark/entrypoints.py | 187 ++++++++++++-------------- 2 files changed, 105 insertions(+), 119 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 336f9777..9d85346b 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -473,23 +473,30 @@ def run( ) @click.option( "--output-path", - type=click.Path(file_okay=True, dir_okay=True, exists=False), - default=None, - is_flag=False, - flag_value=Path.cwd() / "benchmarks_reexported.json", + type=click.Path(), + default=Path.cwd(), + help=( + "Allows re-exporting the benchmarks to other formats. " + "The path to save the output formats to, if the format is a file type. " + "If it is a directory, it will save all output formats selected under it. " + "If it is a file, it will save the corresponding output format to that file. " + "Any output formats that were given that do not match the file extension will " + "be saved in the parent directory of the file path. " + "Defaults to the current working directory. " + ), +) +@click.option( + "--output-formats", + multiple=True, + type=str, + default=("console", "json"), # ("console", "json", "html", "csv") help=( - "Allows re-exporting the benchmarks to another format. " - "The path to save the output to. If it is a directory, " - "it will save benchmarks.json under it. " - "Otherwise, json, yaml, or csv files are supported for output types " - "which will be read from the extension for the file path. " - "This input is optional. If the output path flag is not provided, " - "the benchmarks will not be reexported. If the flag is present but " - "no value is specified, it will default to the current directory " - "with the file name `benchmarks_reexported.json`." + "The output formats to use for the benchmark results. " + "Defaults to console, json, html, and csv where the file formats " + "will be saved at the specified output path." ), ) -def from_file(path, output_path): +def from_file(path, output_path, output_formats): """ Load and optionally re-export a previously saved benchmark report. @@ -497,7 +504,7 @@ def from_file(path, output_path): to different output formats. Supports JSON, YAML, and CSV export formats based on the output file extension. """ - asyncio.run(reimport_benchmarks_report(path, output_path)) + asyncio.run(reimport_benchmarks_report(path, output_path, output_formats)) @cli.command( diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 1e164fe3..0c280a91 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -41,7 +41,7 @@ NonDistributedEnvironment, StrategyType, ) -from guidellm.utils import Console, InfoMixin, ConsoleUpdateStep +from guidellm.utils import Console, InfoMixin __all__ = [ "benchmark_generative_text", @@ -63,8 +63,6 @@ | Path ) -ProcessorType = str | Path | PreTrainedTokenizerBase - OutputFormatType = ( tuple[str, ...] | list[str] @@ -73,15 +71,11 @@ ) -# TODO: Determine final location of the helper functions. - - async def initialize_backend( backend: BackendType | Backend, target: str, model: str | None, backend_kwargs: dict[str, Any] | None, - console_step: ConsoleUpdateStep, ) -> Backend: backend = ( Backend.create( @@ -90,121 +84,46 @@ async def initialize_backend( if not isinstance(backend, Backend) else backend ) - console_step.update(f"{backend.__class__.__name__} backend initialized") await backend.process_startup() await backend.validate() - console_step.finish( - title=f"{backend.__class__.__name__} backend initialized", - details=backend.info, - status_level="success", - ) return backend -async def resolve_processor( - processor: ProcessorType | None, - model: str | None, - backend: BackendType | Backend, - console_step: ConsoleUpdateStep, -) -> ProcessorType: - if processor is not None: - console_step.finish( - title="Processor resolved", - details=f"Using processor '{processor}'", - status_level="success", - ) - elif model is not None: - console_step.finish( - title="Processor resolved", - details=f"Using model '{model}' as processor", - status_level="success", - ) - processor = model - else: - console_step.update( - title="Resolving processor from backend.default_model", - status_level="info", - ) - processor = await backend.default_model() - console_step.finish( - title="Processor resolved", - details=( - f"Using model '{processor}' from backend " - f"{backend.__class__.__name__} as processor" - ), - status_level="success", - ) - await backend.process_shutdown() - return processor - - -async def init_request_loader( - data: DataType, - data_args: dict[str, Any] | None, - processor: ProcessorType, - console_step: ConsoleUpdateStep, -) -> GenerativeRequestLoader: - request_loader = GenerativeRequestLoader( - data=data, - data_args=data_args, - processor=processor, - processor_args=processor_args, - shuffle=data_sampler == "random", - random_seed=random_seed, - ) - unique_requests = request_loader.num_unique_items(raise_err=False) - console_step.finish( - title=( - f"Request loader initialized with {unique_requests} unique requests " - f"from {data}" - ), - details=InfoMixin.extract_from_obj(request_loader), - status_level="success", - ) - return request_loader - async def resolve_profile( constraint_inputs: dict[str, int | float], - profile: Profile | None, + profile: Profile | str | None, rate: list[float] | None, random_seed: int, constraints: dict[str, ConstraintInitializer | Any], - console_step: ConsoleUpdateStep, ): for key, val in constraint_inputs.items(): if val is not None: constraints[key] = val if not isinstance(profile, Profile): - profile = Profile.create( - rate_type=profile, - rate=rate, - random_seed=random_seed, - constraints={**constraints}, - ) + if isinstance(profile, str): + profile = Profile.create( + rate_type=profile, + rate=rate, + random_seed=random_seed, + constraints={**constraints}, + ) + else: + raise ValueError(f"Expected string for profile; got {type(profile)}") + elif constraints: raise ValueError( "Constraints must be empty when providing a Profile instance. " f"Provided constraints: {constraints} ; provided profile: {profile}" ) - console_step.finish( - title=f"{profile.__class__.__name__} profile resolved", - details=InfoMixin.extract_from_obj(profile), - status_level="success", - ) + return profile async def resolve_output_formats( output_formats: OutputFormatType, output_path: str | Path | None, - console_step: ConsoleUpdateStep, ) -> dict[str, GenerativeBenchmarkerOutput]: output_formats = GenerativeBenchmarkerOutput.resolve( output_formats=(output_formats or {}), output_path=output_path ) - console_step.finish( - title="Output formats resolved", - details={key: str(val) for key, val in output_formats.items()}, - status_level="success", - ) return output_formats async def finalize_outputs( @@ -217,8 +136,6 @@ async def finalize_outputs( output_format_results[key] = output_result return output_format_results -# End of helper functions. - async def benchmark_with_scenario(scenario: Scenario, **kwargs): """ @@ -273,20 +190,68 @@ async def benchmark_generative_text( # noqa: C901 with console.print_update_step( title=f"Initializing backend {backend}" ) as console_step: - backend = await initialize_backend(backend) + backend = await initialize_backend(backend, target, model, backend_kwargs) + console_step.finish( + title=f"{backend.__class__.__name__} backend initialized", + details=backend.info, + status_level="success", + ) with console.print_update_step(title="Resolving processor") as console_step: - await resolve_processor(processor, model, backend, console_step) + if processor is not None: + console_step.finish( + title="Processor resolved", + details=f"Using processor '{processor}'", + status_level="success", + ) + elif model is not None: + console_step.finish( + title="Processor resolved", + details=f"Using model '{model}' as processor", + status_level="success", + ) + processor = model + else: + console_step.update( + title="Resolving processor from backend.default_model", + status_level="info", + ) + processor = await backend.default_model() + console_step.finish( + title="Processor resolved", + details=( + f"Using model '{processor}' from backend " + f"{backend.__class__.__name__} as processor" + ), + status_level="success", + ) + await backend.process_shutdown() with console.print_update_step( title=f"Initializing request loader from {data}" ) as console_step: - request_loader = init_request_loader(data, data_args, processor, console_step) + request_loader = GenerativeRequestLoader( + data=data, + data_args=data_args, + processor=processor, + processor_args=processor_args, + shuffle=data_sampler == "random", + random_seed=random_seed, + ) + unique_requests = request_loader.num_unique_items(raise_err=False) + console_step.finish( + title=( + f"Request loader initialized with {unique_requests} unique requests " + f"from {data}" + ), + details=InfoMixin.extract_from_obj(request_loader), + status_level="success", + ) with console.print_update_step( title=f"Resolving profile {profile}" ) as console_step: - resolve_profile( + profile = await resolve_profile( { "max_seconds": max_seconds, "max_requests": max_requests, @@ -298,7 +263,11 @@ async def benchmark_generative_text( # noqa: C901 rate, random_seed, constraints, - console_step, + ) + console_step.finish( + title=f"{profile.__class__.__name__} profile resolved", + details=InfoMixin.extract_from_obj(profile), + status_level="success", ) with console.print_update_step( @@ -321,7 +290,12 @@ async def benchmark_generative_text( # noqa: C901 ) with console.print_update_step(title="Resolving output formats") as console_step: - resolved_output_formats = resolve_output_formats(output_formats, output_path, console_step) + resolved_output_formats = await resolve_output_formats(output_formats, output_path) + console_step.finish( + title="Output formats resolved", + details={key: str(val) for key, val in resolved_output_formats.items()}, + status_level="success", + ) progress_group = BenchmarkerProgressGroup( instances=progress or [], enabled=bool(progress) @@ -355,7 +329,7 @@ async def benchmark_generative_text( # noqa: C901 if benchmark: report.benchmarks.append(benchmark) - output_format_results = finalize_outputs(report, resolved_output_formats) + output_format_results = await finalize_outputs(report, resolved_output_formats) console.print("\n\n") console.print_update( @@ -386,7 +360,12 @@ async def reimport_benchmarks_report( console_step.finish(f"Import of old benchmarks complete; loaded {len(report.benchmarks)} benchmark(s)") with console.print_update_step(title="Resolving output formats") as console_step: - resolved_output_formats = await resolve_output_formats(output_formats, output_path, console_step) + resolved_output_formats = await resolve_output_formats(output_formats, output_path) + console_step.finish( + title="Output formats resolved", + details={key: str(val) for key, val in resolved_output_formats.items()}, + status_level="success", + ) output_format_results = await finalize_outputs(report, resolved_output_formats) From f926e5bc7e28acc184dc3a06b99a6fafa0b86c1c Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Wed, 24 Sep 2025 17:01:35 -0400 Subject: [PATCH 3/3] Add section comments to entrypoints file Also remove unused import Signed-off-by: Jared O'Connell --- src/guidellm/benchmark/entrypoints.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 0c280a91..828402d8 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -26,7 +26,6 @@ from guidellm.benchmark.benchmarker import Benchmarker from guidellm.benchmark.objects import GenerativeBenchmark, GenerativeBenchmarksReport from guidellm.benchmark.output import ( - GenerativeBenchmarkerConsole, GenerativeBenchmarkerOutput, ) from guidellm.benchmark.profile import Profile, ProfileType @@ -52,6 +51,9 @@ _CURRENT_WORKING_DIR = Path.cwd() + +# Data types + DataType = ( Iterable[str] | Iterable[dict[str, Any]] @@ -71,6 +73,8 @@ ) +# Helper functions + async def initialize_backend( backend: BackendType | Backend, target: str, @@ -137,6 +141,8 @@ async def finalize_outputs( return output_format_results +# Complete entrypoints + async def benchmark_with_scenario(scenario: Scenario, **kwargs): """ Run a benchmark using a scenario and specify any extra arguments