diff --git a/.gitignore b/.gitignore index ebbf9b096..6a6ee9bdb 100644 --- a/.gitignore +++ b/.gitignore @@ -168,7 +168,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ # MacOS files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b765a2b..8d6bbf2e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,9 @@ repos: rev: v4.6.0 hooks: - id: trailing-whitespace + exclude: ^tests/?.*/assets/.+ - id: end-of-file-fixer + exclude: ^tests/?.*/assets/.+ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.11.7 hooks: diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 9e8a12fb9..3babc24ac 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -7,12 +7,16 @@ from pydantic import ValidationError from guidellm.backend import BackendType -from guidellm.benchmark import ProfileType +from guidellm.benchmark import ( + ProfileType, + reimport_benchmarks_report, +) from guidellm.benchmark.entrypoints import benchmark_with_scenario from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset from guidellm.scheduler import StrategyType +from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools STRATEGY_PROFILE_CHOICES = set( @@ -25,7 +29,17 @@ def cli(): pass -@cli.command( +@cli.group( + help="Commands to run a new benchmark or load a prior one.", + cls=DefaultGroupHandler, + default="run", +) +def benchmark(): + pass + + +@benchmark.command( + "run", help="Run a benchmark against a generative model using the specified arguments.", context_settings={"auto_envvar_prefix": "GUIDELLM"}, ) @@ -230,7 +244,7 @@ def cli(): type=int, help="The random seed to use for benchmarking to ensure reproducibility.", ) -def benchmark( +def run( scenario, target, backend_type, @@ -306,6 +320,37 @@ def benchmark( ) +@benchmark.command( + "from-file", + help="Load a saved benchmark report." +) +@click.argument( + "path", + type=click.Path(file_okay=True, dir_okay=False, exists=True), + default=Path.cwd() / "benchmarks.json", +) +@click.option( + "--output-path", + type=click.Path(file_okay=True, dir_okay=True, exists=False), + default=None, + is_flag=False, + flag_value=Path.cwd() / "benchmarks_reexported.json", + help=( + "Allows re-exporting the benchmarks to another format. " + "The path to save the output to. If it is a directory, " + "it will save benchmarks.json under it. " + "Otherwise, json, yaml, or csv files are supported for output types " + "which will be read from the extension for the file path. " + "This input is optional. If the output path flag is not provided, " + "the benchmarks will not be reexported. If the flag is present but " + "no value is specified, it will default to the current directory " + "with the file name `benchmarks_reexported.json`." + ), +) +def from_file(path, output_path): + reimport_benchmarks_report(path, output_path) + + def decode_escaped_str(_ctx, _param, value): """ Click auto adds characters. For example, when using --pad-char "\n", @@ -321,10 +366,11 @@ def decode_escaped_str(_ctx, _param, value): @cli.command( + short_help="Prints environment variable settings.", help=( "Print out the available configuration settings that can be set " "through environment variables." - ) + ), ) def config(): print_config() diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index f5130711d..a4676c7e9 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -12,7 +12,7 @@ StatusBreakdown, ) from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker -from .entrypoints import benchmark_generative_text +from .entrypoints import benchmark_generative_text, reimport_benchmarks_report from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport from .profile import ( AsyncProfile, @@ -63,4 +63,5 @@ "ThroughputProfile", "benchmark_generative_text", "create_profile", + "reimport_benchmarks_report", ] diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index ce43fca3e..2ef85c3ec 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -133,13 +133,8 @@ async def benchmark_generative_text( ) if output_console: - orig_enabled = console.enabled - console.enabled = True console.benchmarks = report.benchmarks - console.print_benchmarks_metadata() - console.print_benchmarks_info() - console.print_benchmarks_stats() - console.enabled = orig_enabled + console.print_full_report() if output_path: console.print_line("\nSaving benchmarks report...") @@ -151,3 +146,20 @@ async def benchmark_generative_text( console.print_line("\nBenchmarking complete.") return report, saved_path + + +def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None: + """ + The command-line entry point for re-importing and displaying an + existing benchmarks report. Can also specify + Assumes the file provided exists. + """ + console = GenerativeBenchmarksConsole(enabled=True) + report = GenerativeBenchmarksReport.load_file(file) + console.benchmarks = report.benchmarks + console.print_full_report() + + if output_path: + console.print_line("\nSaving benchmarks report...") + saved_path = report.save_file(output_path) + console.print_line(f"Benchmarks report saved to {saved_path}") diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 4847160d5..5e4c4c670 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -242,7 +242,10 @@ def _file_setup( if path_suffix in [".csv"]: return path, "csv" - raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.") + raise ValueError( + f"Unsupported file extension: {path_suffix} for {path}; " + "expected json, yaml, or csv." + ) @staticmethod def _benchmark_desc_headers_and_values( @@ -944,3 +947,20 @@ def print_benchmarks_stats(self): title="Benchmarks Stats", sections=sections, ) + + def print_full_report(self): + """ + Print out the benchmark statistics to the console. + Temporarily enables the console if it's disabled. + + Format: + - Metadata + - Info + - Stats + """ + orig_enabled = self.enabled + self.enabled = True + self.print_benchmarks_metadata() + self.print_benchmarks_info() + self.print_benchmarks_stats() + self.enabled = orig_enabled diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 399c021d5..fb9262c31 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,4 +1,5 @@ from .colors import Colors +from .default_group import DefaultGroupHandler from .hf_datasets import ( SUPPORTED_TYPES, save_dataset_to_file, @@ -20,6 +21,7 @@ __all__ = [ "SUPPORTED_TYPES", "Colors", + "DefaultGroupHandler", "EndlessTextCreator", "IntegerRangeSampler", "check_load_processor", diff --git a/src/guidellm/utils/default_group.py b/src/guidellm/utils/default_group.py new file mode 100644 index 000000000..b3f0f03b0 --- /dev/null +++ b/src/guidellm/utils/default_group.py @@ -0,0 +1,104 @@ +""" +File uses code adapted from code with the following license: + +Copyright (c) 2015-2023, Heungsub Lee +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ["DefaultGroupHandler"] + +import collections.abc as cabc + +import click + + +class DefaultGroupHandler(click.Group): + """ + Allows the migration to a new sub-command by allowing the group to run + one of its sub-commands as the no-args default command. + """ + + def __init__(self, *args, **kwargs): + # To resolve as the default command. + if not kwargs.get("ignore_unknown_options", True): + raise ValueError("Default group accepts unknown options") + self.ignore_unknown_options = True + self.default_cmd_name = kwargs.pop("default", None) + self.default_if_no_args = kwargs.pop("default_if_no_args", False) + super().__init__(*args, **kwargs) + + def parse_args(self, ctx, args): + if not args and self.default_if_no_args: + args.insert(0, self.default_cmd_name) + return super().parse_args(ctx, args) + + def get_command(self, ctx, cmd_name): + if cmd_name not in self.commands: + # If it doesn't match an existing command, use the default command name. + ctx.arg0 = cmd_name + cmd_name = self.default_cmd_name + return super().get_command(ctx, cmd_name) + + def resolve_command(self, ctx, args): + cmd_name, cmd, args = super().resolve_command(ctx, args) + if hasattr(ctx, "arg0"): + args.insert(0, ctx.arg0) + cmd_name = cmd.name + return cmd_name, cmd, args + + def format_commands(self, ctx, formatter): + """ + Used to wrap the default formatter to clarify which command is the default. + """ + formatter = DefaultCommandFormatter(self, formatter, mark=" (default)") + return super().format_commands(ctx, formatter) + + +class DefaultCommandFormatter: + """ + Wraps a formatter to edit the line for the default command to mark it + with the specified mark string. + """ + + def __init__(self, group, formatter, mark="*"): + self.group = group + self.formatter = formatter + self.mark = mark + super().__init__() + + def __getattr__(self, attr): + return getattr(self.formatter, attr) + + def write_dl(self, rows: cabc.Sequence[tuple[str, str]], *args, **kwargs): + rows_: list[tuple[str, str]] = [] + for cmd_name, help_msg in rows: + if cmd_name == self.group.default_cmd_name: + rows_.insert(0, (cmd_name + self.mark, help_msg)) + else: + rows_.append((cmd_name, help_msg)) + return self.formatter.write_dl(rows_, *args, **kwargs) diff --git a/tests/unit/entrypoints/__init__.py b/tests/unit/entrypoints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.json b/tests/unit/entrypoints/assets/benchmarks_stripped.json new file mode 100644 index 000000000..a95d2880a --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.json @@ -0,0 +1 @@ +{"benchmarks": [{"type_": "generative_benchmark", "id_": "97ece514-8717-412f-9dba-2b42bcd9866f", "run_id": "93e36b31-b454-471d-ba62-6b2671585485", "args": {"profile": {"type_": "sweep", "completed_strategies": 10, "measured_rates": [1.5481806532737452], "measured_concurrencies": [0.9977627456483604], "max_concurrency": null, "strategy_type": "constant", "rate": -1.0, "initial_burst": true, "random_seed": 42, "sweep_size": 10, "rate_type": "constant", "strategy_types": ["synchronous", "throughput", "constant", "constant", "constant", "constant", "constant", "constant", "constant", "constant"]}, "strategy_index": 0, "strategy": {"type_": "synchronous"}, "max_number": null, "max_duration": 30.0, "warmup_number": null, "warmup_duration": null, "cooldown_number": null, "cooldown_duration": null}, "run_stats": {"start_time": 1749157168.054225, "end_time": 1749157198.213826, "requests_made": {"successful": 1, "errored": 0, "incomplete": 0, "total": 1}, "queued_time_avg": 0.631589580089488, "scheduled_time_delay_avg": 3.784260851271609e-06, "scheduled_time_sleep_avg": 0.0, "worker_start_delay_avg": 2.8021792148021943e-05, "worker_time_avg": 0.6373953819274902, "worker_start_time_targeted_delay_avg": 0.6319031715393066, "request_start_time_delay_avg": 0.316034068452551, "request_start_time_targeted_delay_avg": 0.6319856542222043, "request_time_delay_avg": 0.00029866238857837433, "request_time_avg": 0.6370967195389119}, "worker": {"type_": "generative_requests_worker", "backend_type": "openai_http", "backend_target": "example_target", "backend_model": "example_model", "backend_info": {"max_output_tokens": 16384, "timeout": 300, "http2": true, "authorization": false, "organization": null, "project": null, "text_completions_path": "/v1/completions", "chat_completions_path": "/v1/chat/completions"}}, "request_loader": {"type_": "generative_request_loader", "data": "prompt_tokens=256,output_tokens=128", "data_args": null, "processor": "example_processor", "processor_args": null}, "extras": {}, "metrics": {"requests_per_second": {"successful": {"mean": 1.5481806532737452, "median": 1.5530116578512305, "mode": 1.555484186315253, "variance": 0.0003352629331303757, "std_dev": 0.01831018659463567, "min": 1.4509899157628907, "max": 1.5597664461806156, "count": 45, "total_sum": 69.6707872953874, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5590938878953722, "p99": 1.5597664461806156, "p999": 1.5597664461806156}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1.5668128271815418, "median": 1.5530312090734288, "mode": 1.555484186315253, "variance": 0.036536424510388923, "std_dev": 0.19114503527528232, "min": 1.4509899157628907, "max": 3.509921881864626, "count": 46, "total_sum": 73.18070917725203, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5591048992639953, "p99": 1.5597664461806156, "p999": 3.509921881864626}, "cumulative_distribution_function": null}}, "request_concurrency": {"successful": {"mean": 0.9977627456483604, "median": 1.0, "mode": 1.0, "variance": 0.002232249044605607, "std_dev": 0.047246682895263736, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 1.0, "median": 1.0, "mode": 1.0, "variance": 0.0, "std_dev": 0.0, "min": 1.0, "max": 1.0, "count": 1, "total_sum": 1.0, "percentiles": {"p001": 1.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "total": {"mean": 0.9977433642674269, "median": 1.0, "mode": 1.0, "variance": 0.002251543327743578, "std_dev": 0.047450430216633206, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}}, "request_latency": {"successful": {"mean": 0.6444743664368339, "median": 0.6424565315246582, "mode": 0.6395885944366455, "variance": 6.414585873782315e-05, "std_dev": 0.008009110982988258, "min": 0.6395885944366455, "max": 0.6891846656799316, "count": 46, "total_sum": 29.64582085609436, "percentiles": {"p001": 0.6395885944366455, "p01": 0.6395885944366455, "p05": 0.6399857997894287, "p10": 0.6403069496154785, "p25": 0.6409540176391602, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.2836878299713135, "median": 0.2836878299713135, "mode": 0.2836878299713135, "variance": 0.0, "std_dev": 0.0, "min": 0.2836878299713135, "max": 0.2836878299713135, "count": 1, "total_sum": 0.2836878299713135, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.2836878299713135, "p10": 0.2836878299713135, "p25": 0.2836878299713135, "p75": 0.2836878299713135, "p90": 0.2836878299713135, "p95": 0.2836878299713135, "p99": 0.2836878299713135, "p999": 0.2836878299713135}, "cumulative_distribution_function": null}, "total": {"mean": 0.6367980571503334, "median": 0.642310380935669, "mode": 0.2836878299713135, "variance": 0.0027733643692853522, "std_dev": 0.05266274175624881, "min": 0.2836878299713135, "max": 0.6891846656799316, "count": 47, "total_sum": 29.929508686065674, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.6398613452911377, "p10": 0.6402454376220703, "p25": 0.640899658203125, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}}, "prompt_token_count": {"successful": {"mean": 257.1086956521739, "median": 257.0, "mode": 257.0, "variance": 0.14035916824196598, "std_dev": 0.37464538999161057, "min": 257.0, "max": 259.0, "count": 46, "total_sum": 11827.0, "percentiles": {"p001": 257.0, "p01": 257.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 256.0, "median": 256.0, "mode": 256.0, "variance": 0.0, "std_dev": 0.0, "min": 256.0, "max": 256.0, "count": 1, "total_sum": 256.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 256.0, "p10": 256.0, "p25": 256.0, "p75": 256.0, "p90": 256.0, "p95": 256.0, "p99": 256.0, "p999": 256.0}, "cumulative_distribution_function": null}, "total": {"mean": 257.0851063829787, "median": 257.0, "mode": 256.0, "variance": 0.16296966953372566, "std_dev": 0.40369502044702715, "min": 256.0, "max": 259.0, "count": 47, "total_sum": 12083.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}}, "output_token_count": {"successful": {"mean": 127.99999999999999, "median": 128.0, "mode": 128.0, "variance": 2.01948391736579e-28, "std_dev": 1.4210854715202002e-14, "min": 128.0, "max": 128.0, "count": 46, "total_sum": 5888.0, "percentiles": {"p001": 128.0, "p01": 128.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 126.44680851063832, "median": 128.0, "mode": 55.0, "variance": 110.97057492077867, "std_dev": 10.534257207832866, "min": 55.0, "max": 128.0, "count": 47, "total_sum": 5943.0, "percentiles": {"p001": 55.0, "p01": 55.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}}, "time_to_first_token_ms": {"successful": {"mean": 16.792535781860348, "median": 16.38054847717285, "mode": 15.790939331054688, "variance": 1.2776652847210441, "std_dev": 1.1303385708366516, "min": 15.790939331054688, "max": 21.281957626342773, "count": 46, "total_sum": 772.4566459655762, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.111373901367188, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 16.777170465347616, "median": 16.371726989746094, "mode": 15.790939331054688, "variance": 1.2613411927317046, "std_dev": 1.1230944718641014, "min": 15.790939331054688, "max": 21.281957626342773, "count": 47, "total_sum": 788.5270118713379, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.100645065307617, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}}, "time_per_output_token_ms": {"successful": {"mean": 4.90300272307966, "median": 4.885653033852577, "mode": 4.870360717177391, "variance": 0.003163643010108571, "std_dev": 0.05624627107736628, "min": 4.870360717177391, "max": 5.217265337705612, "count": 46, "total_sum": 225.5381252616644, "percentiles": {"p001": 4.870360717177391, "p01": 4.870360717177391, "p05": 4.8728808760643005, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9022222114856975, "median": 4.882922396063805, "mode": 4.870360717177391, "variance": 0.003199582258516055, "std_dev": 0.05656485002646127, "min": 4.81866489757191, "max": 5.217265337705612, "count": 47, "total_sum": 230.3567901592363, "percentiles": {"p001": 4.81866489757191, "p01": 4.870360717177391, "p05": 4.872731864452362, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}}, "inter_token_latency_ms": {"successful": {"mean": 4.941609043733832, "median": 4.9241227427805505, "mode": 4.90871001416304, "variance": 0.003213660306132974, "std_dev": 0.056689155101597465, "min": 4.90871001416304, "max": 5.258346167136365, "count": 46, "total_sum": 227.31401601175622, "percentiles": {"p001": 4.90871001416304, "p01": 4.90871001416304, "p05": 4.911250016820713, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9413003057767115, "median": 4.921370603906826, "mode": 4.90871001416304, "variance": 0.003194539306669541, "std_dev": 0.056520255720135776, "min": 4.9078994327121315, "max": 5.258346167136365, "count": 47, "total_sum": 232.22191544446835, "percentiles": {"p001": 4.9078994327121315, "p01": 4.90871001416304, "p05": 4.911099831888995, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}}, "output_tokens_per_second": {"successful": {"mean": 198.13346751788123, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 613.9948900522365, "std_dev": 24.778920276158857, "min": 0.0, "max": 203.69598368219124, "count": 122, "total_sum": 17849.590625912137, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.42923658938793, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 198.08514508750469, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 619.6237334717947, "std_dev": 24.89224243558211, "min": 0.0, "max": 203.69598368219124, "count": 125, "total_sum": 18310.99071823841, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.4193704835346, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}}, "tokens_per_second": {"successful": {"mean": 992.6867036588937, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 62014350.40386989, "std_dev": 7874.919072845758, "min": 0.0, "max": 159300.81436773148, "count": 139, "total_sum": 5852579.912913391, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 617.5359246171967, "p999": 157985.65557672578}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1002.1268169766876, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 63939736.95341249, "std_dev": 7996.232672541019, "min": 0.0, "max": 296531.848660591, "count": 143, "total_sum": 6151486.576325966, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 1158.3275338304336, "p999": 158008.81383758428}, "cumulative_distribution_function": null}}}, "start_time": 1749157168.1827004, "end_time": 1749157198.1799018, "request_totals": {"successful": 46, "errored": 0, "incomplete": 1, "total": 47}, "request_samples": null, "requests": {"successful": [{"type_": "generative_text_response", "request_id": "73054dd1-486f-4894-a861-075750b82453", "request_type": "text_completions", "scheduler_info": {"requested": true, "completed": true, "errored": false, "canceled": false, "targeted_start_time": 1749157168.179883, "queued_time": 1749157168.1811602, "dequeued_time": 1749157168.1818697, "scheduled_time": 1749157168.181895, "worker_start": 1749157168.1820004, "request_start": 1749157168.1827004, "request_end": 1749157168.871885, "worker_end": 1749157168.8723884, "process_id": 0}, "prompt": "such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner, as may in time make their past imprudence forgotten. Their conduct has been such, replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It is useless to talk of it. It now occurred to the girls that their mother was in all likelihood perfectly ignorant of what had happened. They went to the library, therefore, and asked their father whether he would not wish them to make it known to her. He was writing, and, without raising his head, coolly replied, Just as you please. May we take my uncle s letter to read to her? Take whatever you like, and get away", "output": ", said Jane. The letter was read, and the girls retired to their own apartments. Elizabeth was the first to return. She found her mother seated in the drawing-room, and looking very pale. She was dressed in a loose white gown, and her hair was disordered. She rose as they entered, and clasped them both in her arms, and then, without saying a word, took her seat on the sofa, and began to weep. Elizabeth and Jane stood by her side, and listened to the sobs which issued from her heart. She had no words to express her gratitude, and, in a few minutes,", "prompt_tokens": 257, "output_tokens": 128, "start_time": 1749157168.1827004, "end_time": 1749157168.871885, "first_token_time": 1749157168.2039824, "last_token_time": 1749157168.8717923, "request_latency": 0.6891846656799316, "time_to_first_token_ms": 21.281957626342773, "time_per_output_token_ms": 5.217265337705612, "inter_token_latency_ms": 5.258346167136365, "tokens_per_second": 558.631117568713, "output_tokens_per_second": 185.72670921765}], "errored": [], "incomplete": [], "total": null}, "duration": 29.997201442718506}]} \ No newline at end of file diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.yaml b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml new file mode 100644 index 000000000..1d39e62d3 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml @@ -0,0 +1,1026 @@ +--- +benchmarks: +- type_: generative_benchmark + id_: 97ece514-8717-412f-9dba-2b42bcd9866f + run_id: 93e36b31-b454-471d-ba62-6b2671585485 + args: + profile: + type_: sweep + completed_strategies: 10 + measured_rates: + - 1.5481806532737452 + measured_concurrencies: + - 0.9977627456483604 + max_concurrency: + strategy_type: constant + rate: -1 + initial_burst: true + random_seed: 42 + sweep_size: 10 + rate_type: constant + strategy_types: + - synchronous + strategy_index: 0 + strategy: + type_: synchronous + max_number: + max_duration: 30 + warmup_number: + warmup_duration: + cooldown_number: + cooldown_duration: + run_stats: + start_time: 1749157168.054225 + end_time: 1749157198.213826 + requests_made: + successful: 1 + errored: 0 + incomplete: 0 + total: 1 + queued_time_avg: 0.631589580089488 + scheduled_time_delay_avg: 3.784260851271609e-06 + scheduled_time_sleep_avg: 0 + worker_start_delay_avg: 2.8021792148021943e-05 + worker_time_avg: 0.6373953819274902 + worker_start_time_targeted_delay_avg: 0.6319031715393066 + request_start_time_delay_avg: 0.316034068452551 + request_start_time_targeted_delay_avg: 0.6319856542222043 + request_time_delay_avg: 0.00029866238857837433 + request_time_avg: 0.6370967195389119 + worker: + type_: generative_requests_worker + backend_type: openai_http + backend_target: example_target + backend_model: example_model + backend_info: + max_output_tokens: 16384 + timeout: 300 + http2: true + authorization: false + organization: + project: + text_completions_path: "/v1/completions" + chat_completions_path: "/v1/chat/completions" + request_loader: + type_: generative_request_loader + data: prompt_tokens=256,output_tokens=128 + data_args: + processor: example_processor + processor_args: + extras: {} + metrics: + requests_per_second: + successful: + mean: 1.5481806532737452 + median: 1.5530116578512305 + mode: 1.555484186315253 + variance: 0.0003352629331303757 + std_dev: 0.01831018659463567 + min: 1.4509899157628907 + max: 1.5597664461806156 + count: 45 + total_sum: 69.6707872953874 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5590938878953722 + p99: 1.5597664461806156 + p999: 1.5597664461806156 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1.5668128271815418 + median: 1.5530312090734288 + mode: 1.555484186315253 + variance: 0.036536424510388923 + std_dev: 0.19114503527528232 + min: 1.4509899157628907 + max: 3.509921881864626 + count: 46 + total_sum: 73.18070917725203 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5591048992639953 + p99: 1.5597664461806156 + p999: 3.509921881864626 + cumulative_distribution_function: + request_concurrency: + successful: + mean: 0.9977627456483604 + median: 1 + mode: 1 + variance: 0.002232249044605607 + std_dev: 0.047246682895263736 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 1 + median: 1 + mode: 1 + variance: 0 + std_dev: 0 + min: 1 + max: 1 + count: 1 + total_sum: 1 + percentiles: + p001: 1 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + total: + mean: 0.9977433642674269 + median: 1 + mode: 1 + variance: 0.002251543327743578 + std_dev: 0.047450430216633206 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + request_latency: + successful: + mean: 0.6444743664368339 + median: 0.6424565315246582 + mode: 0.6395885944366455 + variance: 6.414585873782315e-05 + std_dev: 0.008009110982988258 + min: 0.6395885944366455 + max: 0.6891846656799316 + count: 46 + total_sum: 29.64582085609436 + percentiles: + p001: 0.6395885944366455 + p01: 0.6395885944366455 + p05: 0.6399857997894287 + p10: 0.6403069496154785 + p25: 0.6409540176391602 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0.2836878299713135 + median: 0.2836878299713135 + mode: 0.2836878299713135 + variance: 0 + std_dev: 0 + min: 0.2836878299713135 + max: 0.2836878299713135 + count: 1 + total_sum: 0.2836878299713135 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.2836878299713135 + p10: 0.2836878299713135 + p25: 0.2836878299713135 + p75: 0.2836878299713135 + p90: 0.2836878299713135 + p95: 0.2836878299713135 + p99: 0.2836878299713135 + p999: 0.2836878299713135 + cumulative_distribution_function: + total: + mean: 0.6367980571503334 + median: 0.642310380935669 + mode: 0.2836878299713135 + variance: 0.0027733643692853522 + std_dev: 0.05266274175624881 + min: 0.2836878299713135 + max: 0.6891846656799316 + count: 47 + total_sum: 29.929508686065674 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.6398613452911377 + p10: 0.6402454376220703 + p25: 0.640899658203125 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + prompt_token_count: + successful: + mean: 257.1086956521739 + median: 257 + mode: 257 + variance: 0.14035916824196598 + std_dev: 0.37464538999161057 + min: 257 + max: 259 + count: 46 + total_sum: 11827 + percentiles: + p001: 257 + p01: 257 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 256 + median: 256 + mode: 256 + variance: 0 + std_dev: 0 + min: 256 + max: 256 + count: 1 + total_sum: 256 + percentiles: + p001: 256 + p01: 256 + p05: 256 + p10: 256 + p25: 256 + p75: 256 + p90: 256 + p95: 256 + p99: 256 + p999: 256 + cumulative_distribution_function: + total: + mean: 257.0851063829787 + median: 257 + mode: 256 + variance: 0.16296966953372566 + std_dev: 0.40369502044702715 + min: 256 + max: 259 + count: 47 + total_sum: 12083 + percentiles: + p001: 256 + p01: 256 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + output_token_count: + successful: + mean: 127.99999999999999 + median: 128 + mode: 128 + variance: 2.01948391736579e-28 + std_dev: 1.4210854715202002e-14 + min: 128 + max: 128 + count: 46 + total_sum: 5888 + percentiles: + p001: 128 + p01: 128 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 126.44680851063832 + median: 128 + mode: 55 + variance: 110.97057492077867 + std_dev: 10.534257207832866 + min: 55 + max: 128 + count: 47 + total_sum: 5943 + percentiles: + p001: 55 + p01: 55 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + time_to_first_token_ms: + successful: + mean: 16.792535781860348 + median: 16.38054847717285 + mode: 15.790939331054688 + variance: 1.2776652847210441 + std_dev: 1.1303385708366516 + min: 15.790939331054688 + max: 21.281957626342773 + count: 46 + total_sum: 772.4566459655762 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.111373901367188 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 16.777170465347616 + median: 16.371726989746094 + mode: 15.790939331054688 + variance: 1.2613411927317046 + std_dev: 1.1230944718641014 + min: 15.790939331054688 + max: 21.281957626342773 + count: 47 + total_sum: 788.5270118713379 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.100645065307617 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + time_per_output_token_ms: + successful: + mean: 4.90300272307966 + median: 4.885653033852577 + mode: 4.870360717177391 + variance: 0.003163643010108571 + std_dev: 0.05624627107736628 + min: 4.870360717177391 + max: 5.217265337705612 + count: 46 + total_sum: 225.5381252616644 + percentiles: + p001: 4.870360717177391 + p01: 4.870360717177391 + p05: 4.8728808760643005 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9022222114856975 + median: 4.882922396063805 + mode: 4.870360717177391 + variance: 0.003199582258516055 + std_dev: 0.05656485002646127 + min: 4.81866489757191 + max: 5.217265337705612 + count: 47 + total_sum: 230.3567901592363 + percentiles: + p001: 4.81866489757191 + p01: 4.870360717177391 + p05: 4.872731864452362 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + inter_token_latency_ms: + successful: + mean: 4.941609043733832 + median: 4.9241227427805505 + mode: 4.90871001416304 + variance: 0.003213660306132974 + std_dev: 0.056689155101597465 + min: 4.90871001416304 + max: 5.258346167136365 + count: 46 + total_sum: 227.31401601175622 + percentiles: + p001: 4.90871001416304 + p01: 4.90871001416304 + p05: 4.911250016820713 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9413003057767115 + median: 4.921370603906826 + mode: 4.90871001416304 + variance: 0.003194539306669541 + std_dev: 0.056520255720135776 + min: 4.9078994327121315 + max: 5.258346167136365 + count: 47 + total_sum: 232.22191544446835 + percentiles: + p001: 4.9078994327121315 + p01: 4.90871001416304 + p05: 4.911099831888995 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + output_tokens_per_second: + successful: + mean: 198.13346751788123 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 613.9948900522365 + std_dev: 24.778920276158857 + min: 0 + max: 203.69598368219124 + count: 122 + total_sum: 17849.590625912137 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.42923658938793 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 198.08514508750469 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 619.6237334717947 + std_dev: 24.89224243558211 + min: 0 + max: 203.69598368219124 + count: 125 + total_sum: 18310.99071823841 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.4193704835346 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + tokens_per_second: + successful: + mean: 992.6867036588937 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 62014350.40386989 + std_dev: 7874.919072845758 + min: 0 + max: 159300.81436773148 + count: 139 + total_sum: 5852579.912913391 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 617.5359246171967 + p999: 157985.65557672578 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1002.1268169766876 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 63939736.95341249 + std_dev: 7996.232672541019 + min: 0 + max: 296531.848660591 + count: 143 + total_sum: 6151486.576325966 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 1158.3275338304336 + p999: 158008.81383758428 + cumulative_distribution_function: + start_time: 1749157168.1827004 + end_time: 1749157198.1799018 + request_totals: + successful: 46 + errored: 0 + incomplete: 1 + total: 47 + request_samples: + requests: + successful: + - type_: generative_text_response + request_id: 73054dd1-486f-4894-a861-075750b82453 + request_type: text_completions + scheduler_info: + requested: true + completed: true + errored: false + canceled: false + targeted_start_time: 1749157168.179883 + queued_time: 1749157168.1811602 + dequeued_time: 1749157168.1818697 + scheduled_time: 1749157168.181895 + worker_start: 1749157168.1820004 + request_start: 1749157168.1827004 + request_end: 1749157168.871885 + worker_end: 1749157168.8723884 + process_id: 0 + prompt: such a sacrifice to her advantage as years of gratitude cannot enough + acknowledge. By this time she is actually with them! If such goodness does + not make her miserable now, she will never deserve to be happy! What a meeting + for her, when she first sees my aunt! We must endeavour to forget all that + has passed on either side, said Jane I hope and trust they will yet be happy. + His consenting to marry her is a proof, I will believe, that he is come to + a right way of thinking. Their mutual affection will steady them; and I flatter + myself they will settle so quietly, and live in so rational a manner, as may + in time make their past imprudence forgotten. Their conduct has been such, + replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It + is useless to talk of it. It now occurred to the girls that their mother was + in all likelihood perfectly ignorant of what had happened. They went to the + library, therefore, and asked their father whether he would not wish them + to make it known to her. He was writing, and, without raising his head, coolly + replied, Just as you please. May we take my uncle s letter to read to her? + Take whatever you like, and get away + output: ", said Jane. The letter was read, and the girls retired to their own + apartments. Elizabeth was the first to return. She found her mother seated + in the drawing-room, and looking very pale. She was dressed in a loose white + gown, and her hair was disordered. She rose as they entered, and clasped them + both in her arms, and then, without saying a word, took her seat on the sofa, + and began to weep. Elizabeth and Jane stood by her side, and listened to the + sobs which issued from her heart. She had no words to express her gratitude, + and, in a few minutes," + prompt_tokens: 257 + output_tokens: 128 + start_time: 1749157168.1827004 + end_time: 1749157168.871885 + first_token_time: 1749157168.2039824 + last_token_time: 1749157168.8717923 + request_latency: 0.6891846656799316 + time_to_first_token_ms: 21.281957626342773 + time_per_output_token_ms: 5.217265337705612 + inter_token_latency_ms: 5.258346167136365 + tokens_per_second: 558.631117568713 + output_tokens_per_second: 185.72670921765 + total: + duration: 29.997201442718506 diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt new file mode 100644 index 000000000..170d1e6a4 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt @@ -0,0 +1,31 @@ + + +Benchmarks Metadata: + Run id:93e36b31-b454-471d-ba62-6b2671585485 + Duration:30.2 seconds + Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant'], + max_concurrency=None + Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, cooldown_duration=None + Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' backend_model='example_model' backend_info={'max_output_tokens': 16384, + 'timeout': 300, 'http2': True, 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', 'chat_completions_path': + '/v1/chat/completions'} + Request Loader:type_='generative_request_loader' data='prompt_tokens=256,output_tokens=128' data_args=None processor='example_processor' processor_args=None + Extras:None + + +Benchmarks Info: +=================================================================================================================================================== +Metadata |||| Requests Made ||| Prompt Tok/Req ||| Output Tok/Req ||| Prompt Tok Total||| Output Tok Total || + Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err +-----------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----|-----|-------|------|------ +synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| 256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| 0| 5888| 0| 0 +=================================================================================================================================================== + + +Benchmarks Stats: +=============================================================================================================================================== +Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency (sec) ||| TTFT (ms) ||| ITL (ms) ||| TPOT (ms) || + Benchmark| Per Second| Concurrency| mean| mean| mean| median| p99| mean| median| p99| mean| median| p99| mean| median| p99 +-----------|-----------|------------|------------|------------|------|--------|------|-----|-------|-----|-----|-------|----|-----|-------|---- +synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| 0.69| 16.8| 16.4| 21.3| 4.9| 4.9| 5.3| 4.9| 4.9| 5.2 +=============================================================================================================================================== diff --git a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py new file mode 100644 index 000000000..d76265bea --- /dev/null +++ b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py @@ -0,0 +1,79 @@ +import filecmp +import os +import unittest +from pathlib import Path + +import pytest + +from guidellm.benchmark import reimport_benchmarks_report + +# Set to true to re-write the expected output. +REGENERATE_ARTIFACTS = False + + +@pytest.fixture +def get_test_asset_dir(): + def _() -> Path: + return Path(__file__).parent / "assets" + + return _ + + +@pytest.fixture +def cleanup(): + to_delete: list[Path] = [] + yield to_delete + for item in to_delete: + if item.exists(): + item.unlink() # Deletes the file + + +def test_display_entrypoint_json(capfd, get_test_asset_dir): + generic_test_display_entrypoint( + "benchmarks_stripped.json", + capfd, + get_test_asset_dir, + ) + + +def test_display_entrypoint_yaml(capfd, get_test_asset_dir): + generic_test_display_entrypoint( + "benchmarks_stripped.yaml", + capfd, + get_test_asset_dir, + ) + + +def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): + os.environ["COLUMNS"] = "180" # CLI output depends on terminal width. + asset_dir = get_test_asset_dir() + reimport_benchmarks_report(asset_dir / filename, None) + out, err = capfd.readouterr() + expected_output_path = asset_dir / "benchmarks_stripped_output.txt" + if REGENERATE_ARTIFACTS: + expected_output_path.write_text(out) + # Fail to prevent accidentally leaving regeneration mode on + pytest.fail("Test bypassed to regenerate output") + else: + with expected_output_path.open(encoding="utf_8") as file: + expected_output = file.read() + assert out == expected_output + + +def test_reexporting_benchmark(get_test_asset_dir, cleanup): + asset_dir = get_test_asset_dir() + source_file = asset_dir / "benchmarks_stripped.json" + exported_file = asset_dir / "benchmarks_reexported.json" + # If you need to inspect the output to see why it failed, comment out + # the cleanup statement. + cleanup.append(exported_file) + if exported_file.exists(): + exported_file.unlink() + reimport_benchmarks_report(source_file, exported_file) + # The reexported file should exist and be identical to the source. + assert exported_file.exists() + assert filecmp.cmp(source_file, exported_file, shallow=False) + + +if __name__ == "__main__": + unittest.main()