From 58bca2cbfc68fc1cd7136e1a849092c8e706411f Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 12 Jun 2025 10:42:15 -0400 Subject: [PATCH 01/18] Allow result file to be re-displayed --- .gitignore | 2 +- src/guidellm/__main__.py | 12 +++++++++++- src/guidellm/benchmark/__init__.py | 2 +- src/guidellm/benchmark/entrypoints.py | 17 +++++++++++------ src/guidellm/benchmark/output.py | 17 +++++++++++++++++ 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 1bafbfdc4..d8674ea74 100644 --- a/.gitignore +++ b/.gitignore @@ -168,7 +168,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ # MacOS files diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 7dc068359..954a66996 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -7,7 +7,7 @@ import click from guidellm.backend import BackendType -from guidellm.benchmark import ProfileType, benchmark_generative_text +from guidellm.benchmark import ProfileType, benchmark_generative_text, display_benchmarks_report from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset from guidellm.scheduler import StrategyType @@ -282,6 +282,16 @@ def benchmark( ) +@cli.command(help="Redisplay a saved benchmark report.") +@click.argument( + "path", + type=click.Path(), + default=Path.cwd() / "benchmarks.json", +) +def display(path): + asyncio.run(display_benchmarks_report(path)) + + def decode_escaped_str(_ctx, _param, value): """ Click auto adds characters. For example, when using --pad-char "\n", diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index f5130711d..7297e3d59 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -12,7 +12,7 @@ StatusBreakdown, ) from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker -from .entrypoints import benchmark_generative_text +from .entrypoints import benchmark_generative_text, display_benchmarks_report from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport from .profile import ( AsyncProfile, diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 2f6c7182f..e7c01f4d1 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -1,3 +1,4 @@ +import os from collections.abc import Iterable from pathlib import Path from typing import Any, Literal, Optional, Union @@ -121,13 +122,8 @@ async def benchmark_generative_text( ) if output_console: - orig_enabled = console.enabled - console.enabled = True console.benchmarks = report.benchmarks - console.print_benchmarks_metadata() - console.print_benchmarks_info() - console.print_benchmarks_stats() - console.enabled = orig_enabled + console.print_full_report() if output_path: console.print_line("\nSaving benchmarks report...") @@ -139,3 +135,12 @@ async def benchmark_generative_text( console.print_line("\nBenchmarking complete.") return report, saved_path + +async def display_benchmarks_report(file: str): + console = GenerativeBenchmarksConsole(enabled=True) + if not os.path.exists(file): + console.print_line(f"File {file} not found.") + return + report = GenerativeBenchmarksReport.load_file(file) + console.benchmarks = report.benchmarks + console.print_full_report() \ No newline at end of file diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 4847160d5..cbe4c4bf4 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -944,3 +944,20 @@ def print_benchmarks_stats(self): title="Benchmarks Stats", sections=sections, ) + + def print_full_report(self): + """ + Print out the benchmark statistics to the console. + Temporarily enables the console if it's disabled. + + Format: + - Metadata + - Info + - Stats + """ + orig_enabled = self.enabled + self.enabled = True + self.print_benchmarks_metadata() + self.print_benchmarks_info() + self.print_benchmarks_stats() + self.enabled = orig_enabled \ No newline at end of file From 90328e68d926f7aca48ae2bc3d43d13a51f9abb3 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 12 Jun 2025 19:46:18 -0400 Subject: [PATCH 02/18] Added test for JSON --- src/guidellm/__main__.py | 2 +- src/guidellm/benchmark/entrypoints.py | 2 +- tests/unit/entrypoints/__init__.py | 0 .../assets/benchmarks_stripped_output.txt | 50 +++++++++++++++++++ .../entrypoints/test_display_entrypoint.py | 26 ++++++++++ 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 tests/unit/entrypoints/__init__.py create mode 100644 tests/unit/entrypoints/assets/benchmarks_stripped_output.txt create mode 100644 tests/unit/entrypoints/test_display_entrypoint.py diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 954a66996..1fb17adbb 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -289,7 +289,7 @@ def benchmark( default=Path.cwd() / "benchmarks.json", ) def display(path): - asyncio.run(display_benchmarks_report(path)) + display_benchmarks_report(path) def decode_escaped_str(_ctx, _param, value): diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index e7c01f4d1..426ecd7c0 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -136,7 +136,7 @@ async def benchmark_generative_text( return report, saved_path -async def display_benchmarks_report(file: str): +def display_benchmarks_report(file: str): console = GenerativeBenchmarksConsole(enabled=True) if not os.path.exists(file): console.print_line(f"File {file} not found.") diff --git a/tests/unit/entrypoints/__init__.py b/tests/unit/entrypoints/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt new file mode 100644 index 000000000..03202c088 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt @@ -0,0 +1,50 @@ + + +Benchmarks Metadata: + Run id:93e36b31-b454-471d-ba62-6b2671585485 + Duration:30.2 seconds + Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', + 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', + 'constant'], max_concurrency=None + Args:max_number=None, max_duration=30.0, warmup_number=None, + warmup_duration=None, cooldown_number=None, cooldown_duration=None + Worker:type_='generative_requests_worker' backend_type='openai_http' + backend_target='example_target' backend_model='example_model' + backend_info={'max_output_tokens': 16384, 'timeout': 300, 'http2': True, + 'authorization': False, 'organization': None, 'project': None, + 'text_completions_path': '/v1/completions', 'chat_completions_path': + '/v1/chat/completions'} + Request Loader:type_='generative_request_loader' + data='prompt_tokens=256,output_tokens=128' data_args=None + processor='example_processor' processor_args=None + Extras:None + + +Benchmarks Info: +================================================================================ +=================================================================== +Metadata |||| Requests Made ||| Prompt +Tok/Req ||| Output Tok/Req ||| Prompt Tok Total||| Output Tok Total || + Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| +Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err +-----------|-----------|---------|-------------|------|-----|-----|------|------ +|----|-------|-----|-----|-------|-----|-----|-------|------|------ +synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| +256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| 0| 5888| 0| 0 +================================================================================ +=================================================================== + + +Benchmarks Stats: +================================================================================ +=============================================================== +Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency +(sec) ||| TTFT (ms) ||| ITL (ms) ||| TPOT (ms) || + Benchmark| Per Second| Concurrency| mean| mean| mean| median| +p99| mean| median| p99| mean| median| p99| mean| median| p99 +-----------|-----------|------------|------------|------------|------|--------|- +-----|-----|-------|-----|-----|-------|----|-----|-------|---- +synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| +0.69| 16.8| 16.4| 21.3| 4.9| 4.9| 5.3| 4.9| 4.9| 5.2 +================================================================================ +=============================================================== diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py new file mode 100644 index 000000000..124b93143 --- /dev/null +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -0,0 +1,26 @@ +import os +import unittest +import pytest + +from guidellm.benchmark import display_benchmarks_report + +@pytest.fixture() +def get_test_asset_dir(): + def _() -> str: + return os.path.dirname(os.path.abspath(__file__)) + "/assets" + + return _ + + +def test_display_entrypoint_json(capfd, get_test_asset_dir): + asset_dir = get_test_asset_dir() + display_benchmarks_report(asset_dir + "/benchmarks_stripped.json") + out, err = capfd.readouterr() + expected_output_path = asset_dir + "/benchmarks_stripped_output.txt" + with open(expected_output_path, 'r', encoding='utf_8') as file: + expected_output = file.read() + assert out == expected_output + + +if __name__ == '__main__': + unittest.main() From 07d8150f4cafd169255b75c0b74135cb48fa6c0c Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 12 Jun 2025 23:47:15 -0400 Subject: [PATCH 03/18] Added yaml test --- .../assets/benchmarks_stripped.yaml | 1026 +++++++++++++++++ .../entrypoints/test_display_entrypoint.py | 10 +- 2 files changed, 1035 insertions(+), 1 deletion(-) create mode 100644 tests/unit/entrypoints/assets/benchmarks_stripped.yaml diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.yaml b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml new file mode 100644 index 000000000..1d39e62d3 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.yaml @@ -0,0 +1,1026 @@ +--- +benchmarks: +- type_: generative_benchmark + id_: 97ece514-8717-412f-9dba-2b42bcd9866f + run_id: 93e36b31-b454-471d-ba62-6b2671585485 + args: + profile: + type_: sweep + completed_strategies: 10 + measured_rates: + - 1.5481806532737452 + measured_concurrencies: + - 0.9977627456483604 + max_concurrency: + strategy_type: constant + rate: -1 + initial_burst: true + random_seed: 42 + sweep_size: 10 + rate_type: constant + strategy_types: + - synchronous + strategy_index: 0 + strategy: + type_: synchronous + max_number: + max_duration: 30 + warmup_number: + warmup_duration: + cooldown_number: + cooldown_duration: + run_stats: + start_time: 1749157168.054225 + end_time: 1749157198.213826 + requests_made: + successful: 1 + errored: 0 + incomplete: 0 + total: 1 + queued_time_avg: 0.631589580089488 + scheduled_time_delay_avg: 3.784260851271609e-06 + scheduled_time_sleep_avg: 0 + worker_start_delay_avg: 2.8021792148021943e-05 + worker_time_avg: 0.6373953819274902 + worker_start_time_targeted_delay_avg: 0.6319031715393066 + request_start_time_delay_avg: 0.316034068452551 + request_start_time_targeted_delay_avg: 0.6319856542222043 + request_time_delay_avg: 0.00029866238857837433 + request_time_avg: 0.6370967195389119 + worker: + type_: generative_requests_worker + backend_type: openai_http + backend_target: example_target + backend_model: example_model + backend_info: + max_output_tokens: 16384 + timeout: 300 + http2: true + authorization: false + organization: + project: + text_completions_path: "/v1/completions" + chat_completions_path: "/v1/chat/completions" + request_loader: + type_: generative_request_loader + data: prompt_tokens=256,output_tokens=128 + data_args: + processor: example_processor + processor_args: + extras: {} + metrics: + requests_per_second: + successful: + mean: 1.5481806532737452 + median: 1.5530116578512305 + mode: 1.555484186315253 + variance: 0.0003352629331303757 + std_dev: 0.01831018659463567 + min: 1.4509899157628907 + max: 1.5597664461806156 + count: 45 + total_sum: 69.6707872953874 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5590938878953722 + p99: 1.5597664461806156 + p999: 1.5597664461806156 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1.5668128271815418 + median: 1.5530312090734288 + mode: 1.555484186315253 + variance: 0.036536424510388923 + std_dev: 0.19114503527528232 + min: 1.4509899157628907 + max: 3.509921881864626 + count: 46 + total_sum: 73.18070917725203 + percentiles: + p001: 1.4509899157628907 + p01: 1.4509899157628907 + p05: 1.5190957942495127 + p10: 1.5377883923356668 + p25: 1.5483918601985445 + p75: 1.5567531615313124 + p90: 1.5583715343236735 + p95: 1.5591048992639953 + p99: 1.5597664461806156 + p999: 3.509921881864626 + cumulative_distribution_function: + request_concurrency: + successful: + mean: 0.9977627456483604 + median: 1 + mode: 1 + variance: 0.002232249044605607 + std_dev: 0.047246682895263736 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 1 + median: 1 + mode: 1 + variance: 0 + std_dev: 0 + min: 1 + max: 1 + count: 1 + total_sum: 1 + percentiles: + p001: 1 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + total: + mean: 0.9977433642674269 + median: 1 + mode: 1 + variance: 0.002251543327743578 + std_dev: 0.047450430216633206 + min: 0 + max: 1 + count: 2 + total_sum: 1 + percentiles: + p001: 0 + p01: 1 + p05: 1 + p10: 1 + p25: 1 + p75: 1 + p90: 1 + p95: 1 + p99: 1 + p999: 1 + cumulative_distribution_function: + request_latency: + successful: + mean: 0.6444743664368339 + median: 0.6424565315246582 + mode: 0.6395885944366455 + variance: 6.414585873782315e-05 + std_dev: 0.008009110982988258 + min: 0.6395885944366455 + max: 0.6891846656799316 + count: 46 + total_sum: 29.64582085609436 + percentiles: + p001: 0.6395885944366455 + p01: 0.6395885944366455 + p05: 0.6399857997894287 + p10: 0.6403069496154785 + p25: 0.6409540176391602 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0.2836878299713135 + median: 0.2836878299713135 + mode: 0.2836878299713135 + variance: 0 + std_dev: 0 + min: 0.2836878299713135 + max: 0.2836878299713135 + count: 1 + total_sum: 0.2836878299713135 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.2836878299713135 + p10: 0.2836878299713135 + p25: 0.2836878299713135 + p75: 0.2836878299713135 + p90: 0.2836878299713135 + p95: 0.2836878299713135 + p99: 0.2836878299713135 + p999: 0.2836878299713135 + cumulative_distribution_function: + total: + mean: 0.6367980571503334 + median: 0.642310380935669 + mode: 0.2836878299713135 + variance: 0.0027733643692853522 + std_dev: 0.05266274175624881 + min: 0.2836878299713135 + max: 0.6891846656799316 + count: 47 + total_sum: 29.929508686065674 + percentiles: + p001: 0.2836878299713135 + p01: 0.2836878299713135 + p05: 0.6398613452911377 + p10: 0.6402454376220703 + p25: 0.640899658203125 + p75: 0.644390344619751 + p90: 0.6488735675811768 + p95: 0.656728982925415 + p99: 0.6891846656799316 + p999: 0.6891846656799316 + cumulative_distribution_function: + prompt_token_count: + successful: + mean: 257.1086956521739 + median: 257 + mode: 257 + variance: 0.14035916824196598 + std_dev: 0.37464538999161057 + min: 257 + max: 259 + count: 46 + total_sum: 11827 + percentiles: + p001: 257 + p01: 257 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 256 + median: 256 + mode: 256 + variance: 0 + std_dev: 0 + min: 256 + max: 256 + count: 1 + total_sum: 256 + percentiles: + p001: 256 + p01: 256 + p05: 256 + p10: 256 + p25: 256 + p75: 256 + p90: 256 + p95: 256 + p99: 256 + p999: 256 + cumulative_distribution_function: + total: + mean: 257.0851063829787 + median: 257 + mode: 256 + variance: 0.16296966953372566 + std_dev: 0.40369502044702715 + min: 256 + max: 259 + count: 47 + total_sum: 12083 + percentiles: + p001: 256 + p01: 256 + p05: 257 + p10: 257 + p25: 257 + p75: 257 + p90: 257 + p95: 258 + p99: 259 + p999: 259 + cumulative_distribution_function: + output_token_count: + successful: + mean: 127.99999999999999 + median: 128 + mode: 128 + variance: 2.01948391736579e-28 + std_dev: 1.4210854715202002e-14 + min: 128 + max: 128 + count: 46 + total_sum: 5888 + percentiles: + p001: 128 + p01: 128 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 126.44680851063832 + median: 128 + mode: 55 + variance: 110.97057492077867 + std_dev: 10.534257207832866 + min: 55 + max: 128 + count: 47 + total_sum: 5943 + percentiles: + p001: 55 + p01: 55 + p05: 128 + p10: 128 + p25: 128 + p75: 128 + p90: 128 + p95: 128 + p99: 128 + p999: 128 + cumulative_distribution_function: + time_to_first_token_ms: + successful: + mean: 16.792535781860348 + median: 16.38054847717285 + mode: 15.790939331054688 + variance: 1.2776652847210441 + std_dev: 1.1303385708366516 + min: 15.790939331054688 + max: 21.281957626342773 + count: 46 + total_sum: 772.4566459655762 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.111373901367188 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 16.777170465347616 + median: 16.371726989746094 + mode: 15.790939331054688 + variance: 1.2613411927317046 + std_dev: 1.1230944718641014 + min: 15.790939331054688 + max: 21.281957626342773 + count: 47 + total_sum: 788.5270118713379 + percentiles: + p001: 15.790939331054688 + p01: 15.790939331054688 + p05: 15.971660614013672 + p10: 16.034841537475586 + p25: 16.100645065307617 + p75: 16.840696334838867 + p90: 18.505334854125977 + p95: 19.00935173034668 + p99: 21.281957626342773 + p999: 21.281957626342773 + cumulative_distribution_function: + time_per_output_token_ms: + successful: + mean: 4.90300272307966 + median: 4.885653033852577 + mode: 4.870360717177391 + variance: 0.003163643010108571 + std_dev: 0.05624627107736628 + min: 4.870360717177391 + max: 5.217265337705612 + count: 46 + total_sum: 225.5381252616644 + percentiles: + p001: 4.870360717177391 + p01: 4.870360717177391 + p05: 4.8728808760643005 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9022222114856975 + median: 4.882922396063805 + mode: 4.870360717177391 + variance: 0.003199582258516055 + std_dev: 0.05656485002646127 + min: 4.81866489757191 + max: 5.217265337705612 + count: 47 + total_sum: 230.3567901592363 + percentiles: + p001: 4.81866489757191 + p01: 4.870360717177391 + p05: 4.872731864452362 + p10: 4.873953759670258 + p25: 4.876237362623215 + p75: 4.904214292764664 + p90: 4.934689030051231 + p95: 4.993332549929619 + p99: 5.217265337705612 + p999: 5.217265337705612 + cumulative_distribution_function: + inter_token_latency_ms: + successful: + mean: 4.941609043733832 + median: 4.9241227427805505 + mode: 4.90871001416304 + variance: 0.003213660306132974 + std_dev: 0.056689155101597465 + min: 4.90871001416304 + max: 5.258346167136365 + count: 46 + total_sum: 227.31401601175622 + percentiles: + p001: 4.90871001416304 + p01: 4.90871001416304 + p05: 4.911250016820713 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 4.9413003057767115 + median: 4.921370603906826 + mode: 4.90871001416304 + variance: 0.003194539306669541 + std_dev: 0.056520255720135776 + min: 4.9078994327121315 + max: 5.258346167136365 + count: 47 + total_sum: 232.22191544446835 + percentiles: + p001: 4.9078994327121315 + p01: 4.90871001416304 + p05: 4.911099831888995 + p10: 4.9123313483290785 + p25: 4.91463293240765 + p75: 4.9428301533376136 + p90: 4.973544849185493 + p95: 5.032650129062923 + p99: 5.258346167136365 + p999: 5.258346167136365 + cumulative_distribution_function: + output_tokens_per_second: + successful: + mean: 198.13346751788123 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 613.9948900522365 + std_dev: 24.778920276158857 + min: 0 + max: 203.69598368219124 + count: 122 + total_sum: 17849.590625912137 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.42923658938793 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 198.08514508750469 + median: 203.04516628745705 + mode: 203.5378269520066 + variance: 619.6237334717947 + std_dev: 24.89224243558211 + min: 0 + max: 203.69598368219124 + count: 125 + total_sum: 18310.99071823841 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 190.14888022486173 + p10: 200.69400449782287 + p25: 202.23259402121505 + p75: 203.4193704835346 + p90: 203.5378269520066 + p95: 203.58722454130668 + p99: 203.6860916860917 + p999: 203.69598368219124 + cumulative_distribution_function: + tokens_per_second: + successful: + mean: 992.6867036588937 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 62014350.40386989 + std_dev: 7874.919072845758 + min: 0 + max: 159300.81436773148 + count: 139 + total_sum: 5852579.912913391 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 617.5359246171967 + p999: 157985.65557672578 + cumulative_distribution_function: + errored: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + incomplete: + mean: 0 + median: 0 + mode: 0 + variance: 0 + std_dev: 0 + min: 0 + max: 0 + count: 0 + total_sum: 0 + percentiles: + p001: 0 + p01: 0 + p05: 0 + p10: 0 + p25: 0 + p75: 0 + p90: 0 + p95: 0 + p99: 0 + p999: 0 + cumulative_distribution_function: + total: + mean: 1002.1268169766876 + median: 614.3700014647723 + mode: 615.2712336805046 + variance: 63939736.95341249 + std_dev: 7996.232672541019 + min: 0 + max: 296531.848660591 + count: 143 + total_sum: 6151486.576325966 + percentiles: + p001: 46.71289356157213 + p01: 55.502236337170835 + p05: 574.9559972583961 + p10: 606.8148148148148 + p25: 611.5928842228055 + p75: 615.0907757735738 + p90: 615.4517975055026 + p95: 615.542119166422 + p99: 1158.3275338304336 + p999: 158008.81383758428 + cumulative_distribution_function: + start_time: 1749157168.1827004 + end_time: 1749157198.1799018 + request_totals: + successful: 46 + errored: 0 + incomplete: 1 + total: 47 + request_samples: + requests: + successful: + - type_: generative_text_response + request_id: 73054dd1-486f-4894-a861-075750b82453 + request_type: text_completions + scheduler_info: + requested: true + completed: true + errored: false + canceled: false + targeted_start_time: 1749157168.179883 + queued_time: 1749157168.1811602 + dequeued_time: 1749157168.1818697 + scheduled_time: 1749157168.181895 + worker_start: 1749157168.1820004 + request_start: 1749157168.1827004 + request_end: 1749157168.871885 + worker_end: 1749157168.8723884 + process_id: 0 + prompt: such a sacrifice to her advantage as years of gratitude cannot enough + acknowledge. By this time she is actually with them! If such goodness does + not make her miserable now, she will never deserve to be happy! What a meeting + for her, when she first sees my aunt! We must endeavour to forget all that + has passed on either side, said Jane I hope and trust they will yet be happy. + His consenting to marry her is a proof, I will believe, that he is come to + a right way of thinking. Their mutual affection will steady them; and I flatter + myself they will settle so quietly, and live in so rational a manner, as may + in time make their past imprudence forgotten. Their conduct has been such, + replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It + is useless to talk of it. It now occurred to the girls that their mother was + in all likelihood perfectly ignorant of what had happened. They went to the + library, therefore, and asked their father whether he would not wish them + to make it known to her. He was writing, and, without raising his head, coolly + replied, Just as you please. May we take my uncle s letter to read to her? + Take whatever you like, and get away + output: ", said Jane. The letter was read, and the girls retired to their own + apartments. Elizabeth was the first to return. She found her mother seated + in the drawing-room, and looking very pale. She was dressed in a loose white + gown, and her hair was disordered. She rose as they entered, and clasped them + both in her arms, and then, without saying a word, took her seat on the sofa, + and began to weep. Elizabeth and Jane stood by her side, and listened to the + sobs which issued from her heart. She had no words to express her gratitude, + and, in a few minutes," + prompt_tokens: 257 + output_tokens: 128 + start_time: 1749157168.1827004 + end_time: 1749157168.871885 + first_token_time: 1749157168.2039824 + last_token_time: 1749157168.8717923 + request_latency: 0.6891846656799316 + time_to_first_token_ms: 21.281957626342773 + time_per_output_token_ms: 5.217265337705612 + inter_token_latency_ms: 5.258346167136365 + tokens_per_second: 558.631117568713 + output_tokens_per_second: 185.72670921765 + total: + duration: 29.997201442718506 diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index 124b93143..8b26ff124 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -13,8 +13,16 @@ def _() -> str: def test_display_entrypoint_json(capfd, get_test_asset_dir): + generic_test_display_entrypoint("benchmarks_stripped.json", capfd, get_test_asset_dir) + + +def test_display_entrypoint_yaml(capfd, get_test_asset_dir): + generic_test_display_entrypoint("benchmarks_stripped.yaml", capfd, get_test_asset_dir) + + +def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): asset_dir = get_test_asset_dir() - display_benchmarks_report(asset_dir + "/benchmarks_stripped.json") + display_benchmarks_report(asset_dir + "/" + filename) out, err = capfd.readouterr() expected_output_path = asset_dir + "/benchmarks_stripped_output.txt" with open(expected_output_path, 'r', encoding='utf_8') as file: From 1ed37cdc9456474c33ce7f2f54e86a4958660f9a Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 12 Jun 2025 23:49:37 -0400 Subject: [PATCH 04/18] Fix warning --- src/guidellm/benchmark/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index 7297e3d59..87f8fac56 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -62,5 +62,6 @@ "SynchronousProfile", "ThroughputProfile", "benchmark_generative_text", + "display_benchmarks_report", "create_profile", ] From 379c64fd0f372108331524053c6aa7792ec4002a Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 12 Jun 2025 23:53:45 -0400 Subject: [PATCH 05/18] Add uncommitted file The gitignore is setup to ignore these files by default --- .../assets/benchmarks_stripped.json | 1116 +++++++++++++++++ 1 file changed, 1116 insertions(+) create mode 100644 tests/unit/entrypoints/assets/benchmarks_stripped.json diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.json b/tests/unit/entrypoints/assets/benchmarks_stripped.json new file mode 100644 index 000000000..975b0e659 --- /dev/null +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.json @@ -0,0 +1,1116 @@ +{ + "benchmarks": [ + { + "type_": "generative_benchmark", + "id_": "97ece514-8717-412f-9dba-2b42bcd9866f", + "run_id": "93e36b31-b454-471d-ba62-6b2671585485", + "args": { + "profile": { + "type_": "sweep", + "completed_strategies": 10, + "measured_rates": [ + 1.5481806532737452 + ], + "measured_concurrencies": [ + 0.9977627456483604 + ], + "max_concurrency": null, + "strategy_type": "constant", + "rate": -1, + "initial_burst": true, + "random_seed": 42, + "sweep_size": 10, + "rate_type": "constant", + "strategy_types": [ + "synchronous" + ] + }, + "strategy_index": 0, + "strategy": { + "type_": "synchronous" + }, + "max_number": null, + "max_duration": 30.0, + "warmup_number": null, + "warmup_duration": null, + "cooldown_number": null, + "cooldown_duration": null + }, + "run_stats": { + "start_time": 1749157168.054225, + "end_time": 1749157198.213826, + "requests_made": { + "successful": 1, + "errored": 0, + "incomplete": 0, + "total": 1 + }, + "queued_time_avg": 0.631589580089488, + "scheduled_time_delay_avg": 3.784260851271609e-06, + "scheduled_time_sleep_avg": 0.0, + "worker_start_delay_avg": 2.8021792148021943e-05, + "worker_time_avg": 0.6373953819274902, + "worker_start_time_targeted_delay_avg": 0.6319031715393066, + "request_start_time_delay_avg": 0.316034068452551, + "request_start_time_targeted_delay_avg": 0.6319856542222043, + "request_time_delay_avg": 0.00029866238857837433, + "request_time_avg": 0.6370967195389119 + }, + "worker": { + "type_": "generative_requests_worker", + "backend_type": "openai_http", + "backend_target": "example_target", + "backend_model": "example_model", + "backend_info": { + "max_output_tokens": 16384, + "timeout": 300, + "http2": true, + "authorization": false, + "organization": null, + "project": null, + "text_completions_path": "/v1/completions", + "chat_completions_path": "/v1/chat/completions" + } + }, + "request_loader": { + "type_": "generative_request_loader", + "data": "prompt_tokens=256,output_tokens=128", + "data_args": null, + "processor": "example_processor", + "processor_args": null + }, + "extras": {}, + "metrics": { + "requests_per_second": { + "successful": { + "mean": 1.5481806532737452, + "median": 1.5530116578512305, + "mode": 1.555484186315253, + "variance": 0.0003352629331303757, + "std_dev": 0.01831018659463567, + "min": 1.4509899157628907, + "max": 1.5597664461806156, + "count": 45, + "total_sum": 69.6707872953874, + "percentiles": { + "p001": 1.4509899157628907, + "p01": 1.4509899157628907, + "p05": 1.5190957942495127, + "p10": 1.5377883923356668, + "p25": 1.5483918601985445, + "p75": 1.5567531615313124, + "p90": 1.5583715343236735, + "p95": 1.5590938878953722, + "p99": 1.5597664461806156, + "p999": 1.5597664461806156 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 1.5668128271815418, + "median": 1.5530312090734288, + "mode": 1.555484186315253, + "variance": 0.036536424510388923, + "std_dev": 0.19114503527528232, + "min": 1.4509899157628907, + "max": 3.509921881864626, + "count": 46, + "total_sum": 73.18070917725203, + "percentiles": { + "p001": 1.4509899157628907, + "p01": 1.4509899157628907, + "p05": 1.5190957942495127, + "p10": 1.5377883923356668, + "p25": 1.5483918601985445, + "p75": 1.5567531615313124, + "p90": 1.5583715343236735, + "p95": 1.5591048992639953, + "p99": 1.5597664461806156, + "p999": 3.509921881864626 + }, + "cumulative_distribution_function": null + } + }, + "request_concurrency": { + "successful": { + "mean": 0.9977627456483604, + "median": 1.0, + "mode": 1.0, + "variance": 0.002232249044605607, + "std_dev": 0.047246682895263736, + "min": 0.0, + "max": 1.0, + "count": 2, + "total_sum": 1.0, + "percentiles": { + "p001": 0.0, + "p01": 1.0, + "p05": 1.0, + "p10": 1.0, + "p25": 1.0, + "p75": 1.0, + "p90": 1.0, + "p95": 1.0, + "p99": 1.0, + "p999": 1.0 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 1.0, + "median": 1.0, + "mode": 1.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 1.0, + "max": 1.0, + "count": 1, + "total_sum": 1.0, + "percentiles": { + "p001": 1.0, + "p01": 1.0, + "p05": 1.0, + "p10": 1.0, + "p25": 1.0, + "p75": 1.0, + "p90": 1.0, + "p95": 1.0, + "p99": 1.0, + "p999": 1.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 0.9977433642674269, + "median": 1.0, + "mode": 1.0, + "variance": 0.002251543327743578, + "std_dev": 0.047450430216633206, + "min": 0.0, + "max": 1.0, + "count": 2, + "total_sum": 1.0, + "percentiles": { + "p001": 0.0, + "p01": 1.0, + "p05": 1.0, + "p10": 1.0, + "p25": 1.0, + "p75": 1.0, + "p90": 1.0, + "p95": 1.0, + "p99": 1.0, + "p999": 1.0 + }, + "cumulative_distribution_function": null + } + }, + "request_latency": { + "successful": { + "mean": 0.6444743664368339, + "median": 0.6424565315246582, + "mode": 0.6395885944366455, + "variance": 6.414585873782315e-05, + "std_dev": 0.008009110982988258, + "min": 0.6395885944366455, + "max": 0.6891846656799316, + "count": 46, + "total_sum": 29.64582085609436, + "percentiles": { + "p001": 0.6395885944366455, + "p01": 0.6395885944366455, + "p05": 0.6399857997894287, + "p10": 0.6403069496154785, + "p25": 0.6409540176391602, + "p75": 0.644390344619751, + "p90": 0.6488735675811768, + "p95": 0.656728982925415, + "p99": 0.6891846656799316, + "p999": 0.6891846656799316 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.2836878299713135, + "median": 0.2836878299713135, + "mode": 0.2836878299713135, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.2836878299713135, + "max": 0.2836878299713135, + "count": 1, + "total_sum": 0.2836878299713135, + "percentiles": { + "p001": 0.2836878299713135, + "p01": 0.2836878299713135, + "p05": 0.2836878299713135, + "p10": 0.2836878299713135, + "p25": 0.2836878299713135, + "p75": 0.2836878299713135, + "p90": 0.2836878299713135, + "p95": 0.2836878299713135, + "p99": 0.2836878299713135, + "p999": 0.2836878299713135 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 0.6367980571503334, + "median": 0.642310380935669, + "mode": 0.2836878299713135, + "variance": 0.0027733643692853522, + "std_dev": 0.05266274175624881, + "min": 0.2836878299713135, + "max": 0.6891846656799316, + "count": 47, + "total_sum": 29.929508686065674, + "percentiles": { + "p001": 0.2836878299713135, + "p01": 0.2836878299713135, + "p05": 0.6398613452911377, + "p10": 0.6402454376220703, + "p25": 0.640899658203125, + "p75": 0.644390344619751, + "p90": 0.6488735675811768, + "p95": 0.656728982925415, + "p99": 0.6891846656799316, + "p999": 0.6891846656799316 + }, + "cumulative_distribution_function": null + } + }, + "prompt_token_count": { + "successful": { + "mean": 257.1086956521739, + "median": 257.0, + "mode": 257.0, + "variance": 0.14035916824196598, + "std_dev": 0.37464538999161057, + "min": 257.0, + "max": 259.0, + "count": 46, + "total_sum": 11827.0, + "percentiles": { + "p001": 257.0, + "p01": 257.0, + "p05": 257.0, + "p10": 257.0, + "p25": 257.0, + "p75": 257.0, + "p90": 257.0, + "p95": 258.0, + "p99": 259.0, + "p999": 259.0 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 256.0, + "median": 256.0, + "mode": 256.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 256.0, + "max": 256.0, + "count": 1, + "total_sum": 256.0, + "percentiles": { + "p001": 256.0, + "p01": 256.0, + "p05": 256.0, + "p10": 256.0, + "p25": 256.0, + "p75": 256.0, + "p90": 256.0, + "p95": 256.0, + "p99": 256.0, + "p999": 256.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 257.0851063829787, + "median": 257.0, + "mode": 256.0, + "variance": 0.16296966953372566, + "std_dev": 0.40369502044702715, + "min": 256.0, + "max": 259.0, + "count": 47, + "total_sum": 12083.0, + "percentiles": { + "p001": 256.0, + "p01": 256.0, + "p05": 257.0, + "p10": 257.0, + "p25": 257.0, + "p75": 257.0, + "p90": 257.0, + "p95": 258.0, + "p99": 259.0, + "p999": 259.0 + }, + "cumulative_distribution_function": null + } + }, + "output_token_count": { + "successful": { + "mean": 127.99999999999999, + "median": 128.0, + "mode": 128.0, + "variance": 2.01948391736579e-28, + "std_dev": 1.4210854715202002e-14, + "min": 128.0, + "max": 128.0, + "count": 46, + "total_sum": 5888.0, + "percentiles": { + "p001": 128.0, + "p01": 128.0, + "p05": 128.0, + "p10": 128.0, + "p25": 128.0, + "p75": 128.0, + "p90": 128.0, + "p95": 128.0, + "p99": 128.0, + "p999": 128.0 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 126.44680851063832, + "median": 128.0, + "mode": 55.0, + "variance": 110.97057492077867, + "std_dev": 10.534257207832866, + "min": 55.0, + "max": 128.0, + "count": 47, + "total_sum": 5943.0, + "percentiles": { + "p001": 55.0, + "p01": 55.0, + "p05": 128.0, + "p10": 128.0, + "p25": 128.0, + "p75": 128.0, + "p90": 128.0, + "p95": 128.0, + "p99": 128.0, + "p999": 128.0 + }, + "cumulative_distribution_function": null + } + }, + "time_to_first_token_ms": { + "successful": { + "mean": 16.792535781860348, + "median": 16.38054847717285, + "mode": 15.790939331054688, + "variance": 1.2776652847210441, + "std_dev": 1.1303385708366516, + "min": 15.790939331054688, + "max": 21.281957626342773, + "count": 46, + "total_sum": 772.4566459655762, + "percentiles": { + "p001": 15.790939331054688, + "p01": 15.790939331054688, + "p05": 15.971660614013672, + "p10": 16.034841537475586, + "p25": 16.111373901367188, + "p75": 16.840696334838867, + "p90": 18.505334854125977, + "p95": 19.00935173034668, + "p99": 21.281957626342773, + "p999": 21.281957626342773 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 16.777170465347616, + "median": 16.371726989746094, + "mode": 15.790939331054688, + "variance": 1.2613411927317046, + "std_dev": 1.1230944718641014, + "min": 15.790939331054688, + "max": 21.281957626342773, + "count": 47, + "total_sum": 788.5270118713379, + "percentiles": { + "p001": 15.790939331054688, + "p01": 15.790939331054688, + "p05": 15.971660614013672, + "p10": 16.034841537475586, + "p25": 16.100645065307617, + "p75": 16.840696334838867, + "p90": 18.505334854125977, + "p95": 19.00935173034668, + "p99": 21.281957626342773, + "p999": 21.281957626342773 + }, + "cumulative_distribution_function": null + } + }, + "time_per_output_token_ms": { + "successful": { + "mean": 4.90300272307966, + "median": 4.885653033852577, + "mode": 4.870360717177391, + "variance": 0.003163643010108571, + "std_dev": 0.05624627107736628, + "min": 4.870360717177391, + "max": 5.217265337705612, + "count": 46, + "total_sum": 225.5381252616644, + "percentiles": { + "p001": 4.870360717177391, + "p01": 4.870360717177391, + "p05": 4.8728808760643005, + "p10": 4.873953759670258, + "p25": 4.876237362623215, + "p75": 4.904214292764664, + "p90": 4.934689030051231, + "p95": 4.993332549929619, + "p99": 5.217265337705612, + "p999": 5.217265337705612 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 4.9022222114856975, + "median": 4.882922396063805, + "mode": 4.870360717177391, + "variance": 0.003199582258516055, + "std_dev": 0.05656485002646127, + "min": 4.81866489757191, + "max": 5.217265337705612, + "count": 47, + "total_sum": 230.3567901592363, + "percentiles": { + "p001": 4.81866489757191, + "p01": 4.870360717177391, + "p05": 4.872731864452362, + "p10": 4.873953759670258, + "p25": 4.876237362623215, + "p75": 4.904214292764664, + "p90": 4.934689030051231, + "p95": 4.993332549929619, + "p99": 5.217265337705612, + "p999": 5.217265337705612 + }, + "cumulative_distribution_function": null + } + }, + "inter_token_latency_ms": { + "successful": { + "mean": 4.941609043733832, + "median": 4.9241227427805505, + "mode": 4.90871001416304, + "variance": 0.003213660306132974, + "std_dev": 0.056689155101597465, + "min": 4.90871001416304, + "max": 5.258346167136365, + "count": 46, + "total_sum": 227.31401601175622, + "percentiles": { + "p001": 4.90871001416304, + "p01": 4.90871001416304, + "p05": 4.911250016820713, + "p10": 4.9123313483290785, + "p25": 4.91463293240765, + "p75": 4.9428301533376136, + "p90": 4.973544849185493, + "p95": 5.032650129062923, + "p99": 5.258346167136365, + "p999": 5.258346167136365 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 4.9413003057767115, + "median": 4.921370603906826, + "mode": 4.90871001416304, + "variance": 0.003194539306669541, + "std_dev": 0.056520255720135776, + "min": 4.9078994327121315, + "max": 5.258346167136365, + "count": 47, + "total_sum": 232.22191544446835, + "percentiles": { + "p001": 4.9078994327121315, + "p01": 4.90871001416304, + "p05": 4.911099831888995, + "p10": 4.9123313483290785, + "p25": 4.91463293240765, + "p75": 4.9428301533376136, + "p90": 4.973544849185493, + "p95": 5.032650129062923, + "p99": 5.258346167136365, + "p999": 5.258346167136365 + }, + "cumulative_distribution_function": null + } + }, + "output_tokens_per_second": { + "successful": { + "mean": 198.13346751788123, + "median": 203.04516628745705, + "mode": 203.5378269520066, + "variance": 613.9948900522365, + "std_dev": 24.778920276158857, + "min": 0.0, + "max": 203.69598368219124, + "count": 122, + "total_sum": 17849.590625912137, + "percentiles": { + "p001": 46.71289356157213, + "p01": 55.502236337170835, + "p05": 190.14888022486173, + "p10": 200.69400449782287, + "p25": 202.23259402121505, + "p75": 203.42923658938793, + "p90": 203.5378269520066, + "p95": 203.58722454130668, + "p99": 203.6860916860917, + "p999": 203.69598368219124 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 198.08514508750469, + "median": 203.04516628745705, + "mode": 203.5378269520066, + "variance": 619.6237334717947, + "std_dev": 24.89224243558211, + "min": 0.0, + "max": 203.69598368219124, + "count": 125, + "total_sum": 18310.99071823841, + "percentiles": { + "p001": 46.71289356157213, + "p01": 55.502236337170835, + "p05": 190.14888022486173, + "p10": 200.69400449782287, + "p25": 202.23259402121505, + "p75": 203.4193704835346, + "p90": 203.5378269520066, + "p95": 203.58722454130668, + "p99": 203.6860916860917, + "p999": 203.69598368219124 + }, + "cumulative_distribution_function": null + } + }, + "tokens_per_second": { + "successful": { + "mean": 992.6867036588937, + "median": 614.3700014647723, + "mode": 615.2712336805046, + "variance": 62014350.40386989, + "std_dev": 7874.919072845758, + "min": 0.0, + "max": 159300.81436773148, + "count": 139, + "total_sum": 5852579.912913391, + "percentiles": { + "p001": 46.71289356157213, + "p01": 55.502236337170835, + "p05": 574.9559972583961, + "p10": 606.8148148148148, + "p25": 611.5928842228055, + "p75": 615.0907757735738, + "p90": 615.4517975055026, + "p95": 615.542119166422, + "p99": 617.5359246171967, + "p999": 157985.65557672578 + }, + "cumulative_distribution_function": null + }, + "errored": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "incomplete": { + "mean": 0.0, + "median": 0.0, + "mode": 0.0, + "variance": 0.0, + "std_dev": 0.0, + "min": 0.0, + "max": 0.0, + "count": 0, + "total_sum": 0.0, + "percentiles": { + "p001": 0.0, + "p01": 0.0, + "p05": 0.0, + "p10": 0.0, + "p25": 0.0, + "p75": 0.0, + "p90": 0.0, + "p95": 0.0, + "p99": 0.0, + "p999": 0.0 + }, + "cumulative_distribution_function": null + }, + "total": { + "mean": 1002.1268169766876, + "median": 614.3700014647723, + "mode": 615.2712336805046, + "variance": 63939736.95341249, + "std_dev": 7996.232672541019, + "min": 0.0, + "max": 296531.848660591, + "count": 143, + "total_sum": 6151486.576325966, + "percentiles": { + "p001": 46.71289356157213, + "p01": 55.502236337170835, + "p05": 574.9559972583961, + "p10": 606.8148148148148, + "p25": 611.5928842228055, + "p75": 615.0907757735738, + "p90": 615.4517975055026, + "p95": 615.542119166422, + "p99": 1158.3275338304336, + "p999": 158008.81383758428 + }, + "cumulative_distribution_function": null + } + } + }, + "start_time": 1749157168.1827004, + "end_time": 1749157198.1799018, + "request_totals": { + "successful": 46, + "errored": 0, + "incomplete": 1, + "total": 47 + }, + "request_samples": null, + "requests": { + "successful": [ + { + "type_": "generative_text_response", + "request_id": "73054dd1-486f-4894-a861-075750b82453", + "request_type": "text_completions", + "scheduler_info": { + "requested": true, + "completed": true, + "errored": false, + "canceled": false, + "targeted_start_time": 1749157168.179883, + "queued_time": 1749157168.1811602, + "dequeued_time": 1749157168.1818697, + "scheduled_time": 1749157168.181895, + "worker_start": 1749157168.1820004, + "request_start": 1749157168.1827004, + "request_end": 1749157168.871885, + "worker_end": 1749157168.8723884, + "process_id": 0 + }, + "prompt": "such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner, as may in time make their past imprudence forgotten. Their conduct has been such, replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It is useless to talk of it. It now occurred to the girls that their mother was in all likelihood perfectly ignorant of what had happened. They went to the library, therefore, and asked their father whether he would not wish them to make it known to her. He was writing, and, without raising his head, coolly replied, Just as you please. May we take my uncle s letter to read to her? Take whatever you like, and get away", + "output": ", said Jane. The letter was read, and the girls retired to their own apartments. Elizabeth was the first to return. She found her mother seated in the drawing-room, and looking very pale. She was dressed in a loose white gown, and her hair was disordered. She rose as they entered, and clasped them both in her arms, and then, without saying a word, took her seat on the sofa, and began to weep. Elizabeth and Jane stood by her side, and listened to the sobs which issued from her heart. She had no words to express her gratitude, and, in a few minutes,", + "prompt_tokens": 257, + "output_tokens": 128, + "start_time": 1749157168.1827004, + "end_time": 1749157168.871885, + "first_token_time": 1749157168.2039824, + "last_token_time": 1749157168.8717923, + "request_latency": 0.6891846656799316, + "time_to_first_token_ms": 21.281957626342773, + "time_per_output_token_ms": 5.217265337705612, + "inter_token_latency_ms": 5.258346167136365, + "tokens_per_second": 558.631117568713, + "output_tokens_per_second": 185.72670921765 + } + ], + "total": null + }, + "duration": 29.997201442718506 + } + ] +} \ No newline at end of file From 2a8db0d0696768e725595fc0fbc2e1c88ec4c0c9 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 13 Jun 2025 11:47:17 -0400 Subject: [PATCH 06/18] Fix linter errors --- src/guidellm/__main__.py | 6 +++- src/guidellm/benchmark/__init__.py | 2 +- src/guidellm/benchmark/entrypoints.py | 7 ++--- src/guidellm/benchmark/output.py | 2 +- .../entrypoints/test_display_entrypoint.py | 30 ++++++++++++------- 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 1fb17adbb..1633bb168 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -7,7 +7,11 @@ import click from guidellm.backend import BackendType -from guidellm.benchmark import ProfileType, benchmark_generative_text, display_benchmarks_report +from guidellm.benchmark import ( + ProfileType, + benchmark_generative_text, + display_benchmarks_report, +) from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset from guidellm.scheduler import StrategyType diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index 87f8fac56..22e49ebfc 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -62,6 +62,6 @@ "SynchronousProfile", "ThroughputProfile", "benchmark_generative_text", - "display_benchmarks_report", "create_profile", + "display_benchmarks_report", ] diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 426ecd7c0..e89e58eb5 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -1,4 +1,3 @@ -import os from collections.abc import Iterable from pathlib import Path from typing import Any, Literal, Optional, Union @@ -136,11 +135,11 @@ async def benchmark_generative_text( return report, saved_path -def display_benchmarks_report(file: str): +def display_benchmarks_report(file: Path): console = GenerativeBenchmarksConsole(enabled=True) - if not os.path.exists(file): + if not file.exists(): console.print_line(f"File {file} not found.") return report = GenerativeBenchmarksReport.load_file(file) console.benchmarks = report.benchmarks - console.print_full_report() \ No newline at end of file + console.print_full_report() diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index cbe4c4bf4..2a9967c93 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -960,4 +960,4 @@ def print_full_report(self): self.print_benchmarks_metadata() self.print_benchmarks_info() self.print_benchmarks_stats() - self.enabled = orig_enabled \ No newline at end of file + self.enabled = orig_enabled diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index 8b26ff124..c028e2756 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -1,34 +1,44 @@ -import os import unittest +from pathlib import Path + import pytest from guidellm.benchmark import display_benchmarks_report -@pytest.fixture() + +@pytest.fixture def get_test_asset_dir(): - def _() -> str: - return os.path.dirname(os.path.abspath(__file__)) + "/assets" + def _() -> Path: + return Path(__file__).parent / "assets" return _ def test_display_entrypoint_json(capfd, get_test_asset_dir): - generic_test_display_entrypoint("benchmarks_stripped.json", capfd, get_test_asset_dir) + generic_test_display_entrypoint( + "benchmarks_stripped.json", + capfd, + get_test_asset_dir, + ) def test_display_entrypoint_yaml(capfd, get_test_asset_dir): - generic_test_display_entrypoint("benchmarks_stripped.yaml", capfd, get_test_asset_dir) + generic_test_display_entrypoint( + "benchmarks_stripped.yaml", + capfd, + get_test_asset_dir, + ) def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): asset_dir = get_test_asset_dir() - display_benchmarks_report(asset_dir + "/" + filename) + display_benchmarks_report(asset_dir / filename) out, err = capfd.readouterr() - expected_output_path = asset_dir + "/benchmarks_stripped_output.txt" - with open(expected_output_path, 'r', encoding='utf_8') as file: + expected_output_path = asset_dir / "benchmarks_stripped_output.txt" + with expected_output_path.open(encoding="utf_8") as file: expected_output = file.read() assert out == expected_output -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() From c43573a609e7aca4d8f5791656b73d23113c0d0b Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 13 Jun 2025 12:41:24 -0400 Subject: [PATCH 07/18] Use fixed width for CLI tests --- .../assets/benchmarks_stripped_output.txt | 72 +++++++++---------- .../entrypoints/test_display_entrypoint.py | 2 + 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt index 03202c088..f56839266 100644 --- a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt +++ b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt @@ -3,48 +3,44 @@ Benchmarks Metadata: Run id:93e36b31-b454-471d-ba62-6b2671585485 Duration:30.2 seconds - Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', - 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', - 'constant'], max_concurrency=None - Args:max_number=None, max_duration=30.0, warmup_number=None, - warmup_duration=None, cooldown_number=None, cooldown_duration=None - Worker:type_='generative_requests_worker' backend_type='openai_http' - backend_target='example_target' backend_model='example_model' - backend_info={'max_output_tokens': 16384, 'timeout': 300, 'http2': True, - 'authorization': False, 'organization': None, 'project': None, - 'text_completions_path': '/v1/completions', 'chat_completions_path': - '/v1/chat/completions'} - Request Loader:type_='generative_request_loader' - data='prompt_tokens=256,output_tokens=128' data_args=None - processor='example_processor' processor_args=None + Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', + 'constant', 'constant', 'constant', 'constant'], max_concurrency=None + Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, + cooldown_duration=None + Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' + backend_model='example_model' backend_info={'max_output_tokens': 16384, 'timeout': 300, 'http2': True, + 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', + 'chat_completions_path': '/v1/chat/completions'} + Request Loader:type_='generative_request_loader' data='prompt_tokens=256,output_tokens=128' data_args=None + processor='example_processor' processor_args=None Extras:None Benchmarks Info: -================================================================================ -=================================================================== -Metadata |||| Requests Made ||| Prompt -Tok/Req ||| Output Tok/Req ||| Prompt Tok Total||| Output Tok Total || - Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| -Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err ------------|-----------|---------|-------------|------|-----|-----|------|------ -|----|-------|-----|-----|-------|-----|-----|-------|------|------ -synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| -256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| 0| 5888| 0| 0 -================================================================================ -=================================================================== +======================================================================================================================== +=========================== +Metadata |||| Requests Made ||| Prompt Tok/Req ||| Output Tok/Req ||| Prompt Tok +Total||| Output Tok Total || + Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| +Err| Comp| Inc| Err +-----------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----| +-----|-------|------|------ +synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| 256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| +0| 5888| 0| 0 +======================================================================================================================== +=========================== Benchmarks Stats: -================================================================================ -=============================================================== -Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency -(sec) ||| TTFT (ms) ||| ITL (ms) ||| TPOT (ms) || - Benchmark| Per Second| Concurrency| mean| mean| mean| median| -p99| mean| median| p99| mean| median| p99| mean| median| p99 ------------|-----------|------------|------------|------------|------|--------|- ------|-----|-------|-----|-----|-------|----|-----|-------|---- -synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| -0.69| 16.8| 16.4| 21.3| 4.9| 4.9| 5.3| 4.9| 4.9| 5.2 -================================================================================ -=============================================================== +======================================================================================================================== +======================= +Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency (sec) ||| TTFT (ms) ||| ITL (ms) +||| TPOT (ms) || + Benchmark| Per Second| Concurrency| mean| mean| mean| median| p99| mean| median| p99| mean| median| +p99| mean| median| p99 +-----------|-----------|------------|------------|------------|------|--------|------|-----|-------|-----|-----|-------| +----|-----|-------|---- +synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| 0.69| 16.8| 16.4| 21.3| 4.9| 4.9| +5.3| 4.9| 4.9| 5.2 +======================================================================================================================== +======================= diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index c028e2756..25be6b700 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -1,3 +1,4 @@ +import os import unittest from pathlib import Path @@ -31,6 +32,7 @@ def test_display_entrypoint_yaml(capfd, get_test_asset_dir): def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): + os.environ['COLUMNS'] = "120" # CLI output depends on terminal width. asset_dir = get_test_asset_dir() display_benchmarks_report(asset_dir / filename) out, err = capfd.readouterr() From 18d58971fc892da53fcc5ae16a9c51876b41fb90 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 13 Jun 2025 12:50:34 -0400 Subject: [PATCH 08/18] Fix linter error and exclude test assets from linting --- .pre-commit-config.yaml | 2 ++ tests/unit/entrypoints/test_display_entrypoint.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b765a2b..8d6bbf2e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,9 @@ repos: rev: v4.6.0 hooks: - id: trailing-whitespace + exclude: ^tests/?.*/assets/.+ - id: end-of-file-fixer + exclude: ^tests/?.*/assets/.+ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.11.7 hooks: diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index 25be6b700..2d793bc75 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -32,7 +32,7 @@ def test_display_entrypoint_yaml(capfd, get_test_asset_dir): def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): - os.environ['COLUMNS'] = "120" # CLI output depends on terminal width. + os.environ["COLUMNS"] = "120" # CLI output depends on terminal width. asset_dir = get_test_asset_dir() display_benchmarks_report(asset_dir / filename) out, err = capfd.readouterr() From 2063d43ae062287941bb5e4a8432771fc62b1f59 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 13 Jun 2025 15:15:11 -0400 Subject: [PATCH 09/18] Added option to regenerate test artifact --- .../assets/benchmarks_stripped_output.txt | 53 +++++++------------ .../entrypoints/test_display_entrypoint.py | 16 ++++-- 2 files changed, 31 insertions(+), 38 deletions(-) diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt index f56839266..170d1e6a4 100644 --- a/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt +++ b/tests/unit/entrypoints/assets/benchmarks_stripped_output.txt @@ -3,44 +3,29 @@ Benchmarks Metadata: Run id:93e36b31-b454-471d-ba62-6b2671585485 Duration:30.2 seconds - Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', - 'constant', 'constant', 'constant', 'constant'], max_concurrency=None - Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, - cooldown_duration=None - Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' - backend_model='example_model' backend_info={'max_output_tokens': 16384, 'timeout': 300, 'http2': True, - 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', - 'chat_completions_path': '/v1/chat/completions'} - Request Loader:type_='generative_request_loader' data='prompt_tokens=256,output_tokens=128' data_args=None - processor='example_processor' processor_args=None + Profile:type=sweep, strategies=['synchronous', 'throughput', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant', 'constant'], + max_concurrency=None + Args:max_number=None, max_duration=30.0, warmup_number=None, warmup_duration=None, cooldown_number=None, cooldown_duration=None + Worker:type_='generative_requests_worker' backend_type='openai_http' backend_target='example_target' backend_model='example_model' backend_info={'max_output_tokens': 16384, + 'timeout': 300, 'http2': True, 'authorization': False, 'organization': None, 'project': None, 'text_completions_path': '/v1/completions', 'chat_completions_path': + '/v1/chat/completions'} + Request Loader:type_='generative_request_loader' data='prompt_tokens=256,output_tokens=128' data_args=None processor='example_processor' processor_args=None Extras:None Benchmarks Info: -======================================================================================================================== -=========================== -Metadata |||| Requests Made ||| Prompt Tok/Req ||| Output Tok/Req ||| Prompt Tok -Total||| Output Tok Total || - Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| -Err| Comp| Inc| Err ------------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----| ------|-------|------|------ -synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| 256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| -0| 5888| 0| 0 -======================================================================================================================== -=========================== +=================================================================================================================================================== +Metadata |||| Requests Made ||| Prompt Tok/Req ||| Output Tok/Req ||| Prompt Tok Total||| Output Tok Total || + Benchmark| Start Time| End Time| Duration (s)| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err| Comp| Inc| Err +-----------|-----------|---------|-------------|------|-----|-----|------|------|----|-------|-----|-----|-------|-----|-----|-------|------|------ +synchronous| 16:59:28| 16:59:58| 30.0| 46| 1| 0| 257.1| 256.0| 0.0| 128.0| 0.0| 0.0| 11827| 256| 0| 5888| 0| 0 +=================================================================================================================================================== Benchmarks Stats: -======================================================================================================================== -======================= -Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency (sec) ||| TTFT (ms) ||| ITL (ms) -||| TPOT (ms) || - Benchmark| Per Second| Concurrency| mean| mean| mean| median| p99| mean| median| p99| mean| median| -p99| mean| median| p99 ------------|-----------|------------|------------|------------|------|--------|------|-----|-------|-----|-----|-------| -----|-----|-------|---- -synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| 0.69| 16.8| 16.4| 21.3| 4.9| 4.9| -5.3| 4.9| 4.9| 5.2 -======================================================================================================================== -======================= +=============================================================================================================================================== +Metadata | Request Stats || Out Tok/sec| Tot Tok/sec| Req Latency (sec) ||| TTFT (ms) ||| ITL (ms) ||| TPOT (ms) || + Benchmark| Per Second| Concurrency| mean| mean| mean| median| p99| mean| median| p99| mean| median| p99| mean| median| p99 +-----------|-----------|------------|------------|------------|------|--------|------|-----|-------|-----|-----|-------|----|-----|-------|---- +synchronous| 1.55| 1.00| 198.1| 992.7| 0.64| 0.64| 0.69| 16.8| 16.4| 21.3| 4.9| 4.9| 5.3| 4.9| 4.9| 5.2 +=============================================================================================================================================== diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index 2d793bc75..a8fb891a8 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -7,6 +7,10 @@ from guidellm.benchmark import display_benchmarks_report +# Set to true to re-write the expected output. +REGENERATE_ARTIFACTS = False + + @pytest.fixture def get_test_asset_dir(): def _() -> Path: @@ -32,14 +36,18 @@ def test_display_entrypoint_yaml(capfd, get_test_asset_dir): def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): - os.environ["COLUMNS"] = "120" # CLI output depends on terminal width. + os.environ["COLUMNS"] = "180" # CLI output depends on terminal width. asset_dir = get_test_asset_dir() display_benchmarks_report(asset_dir / filename) out, err = capfd.readouterr() expected_output_path = asset_dir / "benchmarks_stripped_output.txt" - with expected_output_path.open(encoding="utf_8") as file: - expected_output = file.read() - assert out == expected_output + if REGENERATE_ARTIFACTS: + expected_output_path.write_text(out) + assert False # Fail to prevent accidentally leaving this set + else: + with expected_output_path.open(encoding="utf_8") as file: + expected_output = file.read() + assert out == expected_output if __name__ == "__main__": From 27e83910e6ae1ddb5dd1164726abb47c8f0ed950 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 13 Jun 2025 15:18:28 -0400 Subject: [PATCH 10/18] Fix linter errors --- tests/unit/entrypoints/test_display_entrypoint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_display_entrypoint.py index a8fb891a8..013532ea8 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_display_entrypoint.py @@ -6,7 +6,6 @@ from guidellm.benchmark import display_benchmarks_report - # Set to true to re-write the expected output. REGENERATE_ARTIFACTS = False @@ -43,7 +42,8 @@ def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): expected_output_path = asset_dir / "benchmarks_stripped_output.txt" if REGENERATE_ARTIFACTS: expected_output_path.write_text(out) - assert False # Fail to prevent accidentally leaving this set + # Fail to prevent accidentally leaving regeneration mode on + pytest.fail("Test bypassed to regenerate output") else: with expected_output_path.open(encoding="utf_8") as file: expected_output = file.read() From 7f2dd40efeedc6212b70aa6179cb1a8df17ac730 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Mon, 23 Jun 2025 11:56:02 -0400 Subject: [PATCH 11/18] Address review comments --- src/guidellm/__main__.py | 2 +- src/guidellm/benchmark/entrypoints.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 1633bb168..fb7d3ce4f 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -289,7 +289,7 @@ def benchmark( @cli.command(help="Redisplay a saved benchmark report.") @click.argument( "path", - type=click.Path(), + type=click.Path(file_okay=True, dir_okay=False, exists=True), default=Path.cwd() / "benchmarks.json", ) def display(path): diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index e89e58eb5..868b04394 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -136,10 +136,11 @@ async def benchmark_generative_text( return report, saved_path def display_benchmarks_report(file: Path): + """ + The command-line entry point for displaying a benchmarks report. + Assumes the file provided exists. + """ console = GenerativeBenchmarksConsole(enabled=True) - if not file.exists(): - console.print_line(f"File {file} not found.") - return report = GenerativeBenchmarksReport.load_file(file) console.benchmarks = report.benchmarks console.print_full_report() From fff5c875996d89c22445c7a5c5e3ee1e6a24a79f Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 26 Jun 2025 13:42:47 -0400 Subject: [PATCH 12/18] Allow reexporting reimported benchmarks --- pyproject.toml | 1 + src/guidellm/__main__.py | 42 ++++++++++++++++--- src/guidellm/benchmark/__init__.py | 4 +- src/guidellm/benchmark/entrypoints.py | 10 ++++- src/guidellm/benchmark/output.py | 2 +- ...=> test_benchmark_from_file_entrypoint.py} | 4 +- 6 files changed, 50 insertions(+), 13 deletions(-) rename tests/unit/entrypoints/{test_display_entrypoint.py => test_benchmark_from_file_entrypoint.py} (92%) diff --git a/pyproject.toml b/pyproject.toml index a78b1fc50..e528b9159 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ dependencies = [ "pyyaml>=6.0.0", "rich", "transformers", + "click-default-group~=1.2.4" ] [project.optional-dependencies] diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index fb7d3ce4f..f822c7e58 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -3,6 +3,7 @@ import json from pathlib import Path from typing import get_args +from click_default_group import DefaultGroup import click @@ -10,7 +11,7 @@ from guidellm.benchmark import ( ProfileType, benchmark_generative_text, - display_benchmarks_report, + reimport_benchmarks_report, ) from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset @@ -48,8 +49,18 @@ def parse_number_str(ctx, param, value): # noqa: ARG001 def cli(): pass +@cli.group( + help="Commands to run a new benchmark or load a prior one.", + cls=DefaultGroup, + default="run", + default_if_no_args=True, +) +def benchmark(): + pass -@cli.command( + +@benchmark.command( + "run", help="Run a benchmark against a generative model using the specified arguments." ) @click.option( @@ -234,7 +245,7 @@ def cli(): type=int, help="The random seed to use for benchmarking to ensure reproducibility.", ) -def benchmark( +def run( target, backend_type, backend_args, @@ -286,14 +297,32 @@ def benchmark( ) -@cli.command(help="Redisplay a saved benchmark report.") +@benchmark.command(help="Load a saved benchmark report.") @click.argument( "path", type=click.Path(file_okay=True, dir_okay=False, exists=True), default=Path.cwd() / "benchmarks.json", ) -def display(path): - display_benchmarks_report(path) +@click.option( + "--output-path", + type=click.Path(file_okay=True, dir_okay=True, exists=False), + default=None, + is_flag=False, + flag_value=Path.cwd() / "benchmarks_reexported.json", + help=( + "Allows re-exporting the benchmarks to another format." + "The path to save the output to. If it is a directory, " + "it will save benchmarks.json under it. " + "Otherwise, json, yaml, or csv files are supported for output types " + "which will be read from the extension for the file path." + "Optional. If the output path flag is not provided, the benchmarks " + "will not be reexported. If the flag is present but no value is " + "specified, it will default to the current directory with the file " + "name benchmarks_reexported.json." + ), +) +def from_file(path, output_path): + reimport_benchmarks_report(path, output_path) def decode_escaped_str(_ctx, _param, value): @@ -311,6 +340,7 @@ def decode_escaped_str(_ctx, _param, value): @cli.command( + short_help="Prints environment variable settings.", help=( "Print out the available configuration settings that can be set " "through environment variables." diff --git a/src/guidellm/benchmark/__init__.py b/src/guidellm/benchmark/__init__.py index 22e49ebfc..a4676c7e9 100644 --- a/src/guidellm/benchmark/__init__.py +++ b/src/guidellm/benchmark/__init__.py @@ -12,7 +12,7 @@ StatusBreakdown, ) from .benchmarker import Benchmarker, BenchmarkerResult, GenerativeBenchmarker -from .entrypoints import benchmark_generative_text, display_benchmarks_report +from .entrypoints import benchmark_generative_text, reimport_benchmarks_report from .output import GenerativeBenchmarksConsole, GenerativeBenchmarksReport from .profile import ( AsyncProfile, @@ -63,5 +63,5 @@ "ThroughputProfile", "benchmark_generative_text", "create_profile", - "display_benchmarks_report", + "reimport_benchmarks_report", ] diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 868b04394..3a18d8eef 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -135,12 +135,18 @@ async def benchmark_generative_text( return report, saved_path -def display_benchmarks_report(file: Path): +def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None: """ - The command-line entry point for displaying a benchmarks report. + The command-line entry point for re-importing and displaying an + existing benchmarks report. Can also specify Assumes the file provided exists. """ console = GenerativeBenchmarksConsole(enabled=True) report = GenerativeBenchmarksReport.load_file(file) console.benchmarks = report.benchmarks console.print_full_report() + + if output_path: + console.print_line("\nSaving benchmarks report...") + saved_path = report.save_file(output_path) + console.print_line(f"Benchmarks report saved to {saved_path}") diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 2a9967c93..442809573 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -242,7 +242,7 @@ def _file_setup( if path_suffix in [".csv"]: return path, "csv" - raise ValueError(f"Unsupported file extension: {path_suffix} for {path}.") + raise ValueError(f"Unsupported file extension: {path_suffix} for {path}; expected json, yaml, or csv.") @staticmethod def _benchmark_desc_headers_and_values( diff --git a/tests/unit/entrypoints/test_display_entrypoint.py b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py similarity index 92% rename from tests/unit/entrypoints/test_display_entrypoint.py rename to tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py index 013532ea8..002bdca04 100644 --- a/tests/unit/entrypoints/test_display_entrypoint.py +++ b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py @@ -4,7 +4,7 @@ import pytest -from guidellm.benchmark import display_benchmarks_report +from guidellm.benchmark import reimport_benchmarks_report # Set to true to re-write the expected output. REGENERATE_ARTIFACTS = False @@ -37,7 +37,7 @@ def test_display_entrypoint_yaml(capfd, get_test_asset_dir): def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): os.environ["COLUMNS"] = "180" # CLI output depends on terminal width. asset_dir = get_test_asset_dir() - display_benchmarks_report(asset_dir / filename) + reimport_benchmarks_report(asset_dir / filename) out, err = capfd.readouterr() expected_output_path = asset_dir / "benchmarks_stripped_output.txt" if REGENERATE_ARTIFACTS: From 9f9ddc9e8566b849be90b39d20ccb54df28bf6d4 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 26 Jun 2025 14:51:14 -0400 Subject: [PATCH 13/18] Add test for reexporting and fix other tests --- .../assets/benchmarks_stripped.json | 1117 +---------------- .../test_benchmark_from_file_entrypoint.py | 24 +- 2 files changed, 24 insertions(+), 1117 deletions(-) diff --git a/tests/unit/entrypoints/assets/benchmarks_stripped.json b/tests/unit/entrypoints/assets/benchmarks_stripped.json index 975b0e659..a95d2880a 100644 --- a/tests/unit/entrypoints/assets/benchmarks_stripped.json +++ b/tests/unit/entrypoints/assets/benchmarks_stripped.json @@ -1,1116 +1 @@ -{ - "benchmarks": [ - { - "type_": "generative_benchmark", - "id_": "97ece514-8717-412f-9dba-2b42bcd9866f", - "run_id": "93e36b31-b454-471d-ba62-6b2671585485", - "args": { - "profile": { - "type_": "sweep", - "completed_strategies": 10, - "measured_rates": [ - 1.5481806532737452 - ], - "measured_concurrencies": [ - 0.9977627456483604 - ], - "max_concurrency": null, - "strategy_type": "constant", - "rate": -1, - "initial_burst": true, - "random_seed": 42, - "sweep_size": 10, - "rate_type": "constant", - "strategy_types": [ - "synchronous" - ] - }, - "strategy_index": 0, - "strategy": { - "type_": "synchronous" - }, - "max_number": null, - "max_duration": 30.0, - "warmup_number": null, - "warmup_duration": null, - "cooldown_number": null, - "cooldown_duration": null - }, - "run_stats": { - "start_time": 1749157168.054225, - "end_time": 1749157198.213826, - "requests_made": { - "successful": 1, - "errored": 0, - "incomplete": 0, - "total": 1 - }, - "queued_time_avg": 0.631589580089488, - "scheduled_time_delay_avg": 3.784260851271609e-06, - "scheduled_time_sleep_avg": 0.0, - "worker_start_delay_avg": 2.8021792148021943e-05, - "worker_time_avg": 0.6373953819274902, - "worker_start_time_targeted_delay_avg": 0.6319031715393066, - "request_start_time_delay_avg": 0.316034068452551, - "request_start_time_targeted_delay_avg": 0.6319856542222043, - "request_time_delay_avg": 0.00029866238857837433, - "request_time_avg": 0.6370967195389119 - }, - "worker": { - "type_": "generative_requests_worker", - "backend_type": "openai_http", - "backend_target": "example_target", - "backend_model": "example_model", - "backend_info": { - "max_output_tokens": 16384, - "timeout": 300, - "http2": true, - "authorization": false, - "organization": null, - "project": null, - "text_completions_path": "/v1/completions", - "chat_completions_path": "/v1/chat/completions" - } - }, - "request_loader": { - "type_": "generative_request_loader", - "data": "prompt_tokens=256,output_tokens=128", - "data_args": null, - "processor": "example_processor", - "processor_args": null - }, - "extras": {}, - "metrics": { - "requests_per_second": { - "successful": { - "mean": 1.5481806532737452, - "median": 1.5530116578512305, - "mode": 1.555484186315253, - "variance": 0.0003352629331303757, - "std_dev": 0.01831018659463567, - "min": 1.4509899157628907, - "max": 1.5597664461806156, - "count": 45, - "total_sum": 69.6707872953874, - "percentiles": { - "p001": 1.4509899157628907, - "p01": 1.4509899157628907, - "p05": 1.5190957942495127, - "p10": 1.5377883923356668, - "p25": 1.5483918601985445, - "p75": 1.5567531615313124, - "p90": 1.5583715343236735, - "p95": 1.5590938878953722, - "p99": 1.5597664461806156, - "p999": 1.5597664461806156 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 1.5668128271815418, - "median": 1.5530312090734288, - "mode": 1.555484186315253, - "variance": 0.036536424510388923, - "std_dev": 0.19114503527528232, - "min": 1.4509899157628907, - "max": 3.509921881864626, - "count": 46, - "total_sum": 73.18070917725203, - "percentiles": { - "p001": 1.4509899157628907, - "p01": 1.4509899157628907, - "p05": 1.5190957942495127, - "p10": 1.5377883923356668, - "p25": 1.5483918601985445, - "p75": 1.5567531615313124, - "p90": 1.5583715343236735, - "p95": 1.5591048992639953, - "p99": 1.5597664461806156, - "p999": 3.509921881864626 - }, - "cumulative_distribution_function": null - } - }, - "request_concurrency": { - "successful": { - "mean": 0.9977627456483604, - "median": 1.0, - "mode": 1.0, - "variance": 0.002232249044605607, - "std_dev": 0.047246682895263736, - "min": 0.0, - "max": 1.0, - "count": 2, - "total_sum": 1.0, - "percentiles": { - "p001": 0.0, - "p01": 1.0, - "p05": 1.0, - "p10": 1.0, - "p25": 1.0, - "p75": 1.0, - "p90": 1.0, - "p95": 1.0, - "p99": 1.0, - "p999": 1.0 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 1.0, - "median": 1.0, - "mode": 1.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 1.0, - "max": 1.0, - "count": 1, - "total_sum": 1.0, - "percentiles": { - "p001": 1.0, - "p01": 1.0, - "p05": 1.0, - "p10": 1.0, - "p25": 1.0, - "p75": 1.0, - "p90": 1.0, - "p95": 1.0, - "p99": 1.0, - "p999": 1.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 0.9977433642674269, - "median": 1.0, - "mode": 1.0, - "variance": 0.002251543327743578, - "std_dev": 0.047450430216633206, - "min": 0.0, - "max": 1.0, - "count": 2, - "total_sum": 1.0, - "percentiles": { - "p001": 0.0, - "p01": 1.0, - "p05": 1.0, - "p10": 1.0, - "p25": 1.0, - "p75": 1.0, - "p90": 1.0, - "p95": 1.0, - "p99": 1.0, - "p999": 1.0 - }, - "cumulative_distribution_function": null - } - }, - "request_latency": { - "successful": { - "mean": 0.6444743664368339, - "median": 0.6424565315246582, - "mode": 0.6395885944366455, - "variance": 6.414585873782315e-05, - "std_dev": 0.008009110982988258, - "min": 0.6395885944366455, - "max": 0.6891846656799316, - "count": 46, - "total_sum": 29.64582085609436, - "percentiles": { - "p001": 0.6395885944366455, - "p01": 0.6395885944366455, - "p05": 0.6399857997894287, - "p10": 0.6403069496154785, - "p25": 0.6409540176391602, - "p75": 0.644390344619751, - "p90": 0.6488735675811768, - "p95": 0.656728982925415, - "p99": 0.6891846656799316, - "p999": 0.6891846656799316 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.2836878299713135, - "median": 0.2836878299713135, - "mode": 0.2836878299713135, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.2836878299713135, - "max": 0.2836878299713135, - "count": 1, - "total_sum": 0.2836878299713135, - "percentiles": { - "p001": 0.2836878299713135, - "p01": 0.2836878299713135, - "p05": 0.2836878299713135, - "p10": 0.2836878299713135, - "p25": 0.2836878299713135, - "p75": 0.2836878299713135, - "p90": 0.2836878299713135, - "p95": 0.2836878299713135, - "p99": 0.2836878299713135, - "p999": 0.2836878299713135 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 0.6367980571503334, - "median": 0.642310380935669, - "mode": 0.2836878299713135, - "variance": 0.0027733643692853522, - "std_dev": 0.05266274175624881, - "min": 0.2836878299713135, - "max": 0.6891846656799316, - "count": 47, - "total_sum": 29.929508686065674, - "percentiles": { - "p001": 0.2836878299713135, - "p01": 0.2836878299713135, - "p05": 0.6398613452911377, - "p10": 0.6402454376220703, - "p25": 0.640899658203125, - "p75": 0.644390344619751, - "p90": 0.6488735675811768, - "p95": 0.656728982925415, - "p99": 0.6891846656799316, - "p999": 0.6891846656799316 - }, - "cumulative_distribution_function": null - } - }, - "prompt_token_count": { - "successful": { - "mean": 257.1086956521739, - "median": 257.0, - "mode": 257.0, - "variance": 0.14035916824196598, - "std_dev": 0.37464538999161057, - "min": 257.0, - "max": 259.0, - "count": 46, - "total_sum": 11827.0, - "percentiles": { - "p001": 257.0, - "p01": 257.0, - "p05": 257.0, - "p10": 257.0, - "p25": 257.0, - "p75": 257.0, - "p90": 257.0, - "p95": 258.0, - "p99": 259.0, - "p999": 259.0 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 256.0, - "median": 256.0, - "mode": 256.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 256.0, - "max": 256.0, - "count": 1, - "total_sum": 256.0, - "percentiles": { - "p001": 256.0, - "p01": 256.0, - "p05": 256.0, - "p10": 256.0, - "p25": 256.0, - "p75": 256.0, - "p90": 256.0, - "p95": 256.0, - "p99": 256.0, - "p999": 256.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 257.0851063829787, - "median": 257.0, - "mode": 256.0, - "variance": 0.16296966953372566, - "std_dev": 0.40369502044702715, - "min": 256.0, - "max": 259.0, - "count": 47, - "total_sum": 12083.0, - "percentiles": { - "p001": 256.0, - "p01": 256.0, - "p05": 257.0, - "p10": 257.0, - "p25": 257.0, - "p75": 257.0, - "p90": 257.0, - "p95": 258.0, - "p99": 259.0, - "p999": 259.0 - }, - "cumulative_distribution_function": null - } - }, - "output_token_count": { - "successful": { - "mean": 127.99999999999999, - "median": 128.0, - "mode": 128.0, - "variance": 2.01948391736579e-28, - "std_dev": 1.4210854715202002e-14, - "min": 128.0, - "max": 128.0, - "count": 46, - "total_sum": 5888.0, - "percentiles": { - "p001": 128.0, - "p01": 128.0, - "p05": 128.0, - "p10": 128.0, - "p25": 128.0, - "p75": 128.0, - "p90": 128.0, - "p95": 128.0, - "p99": 128.0, - "p999": 128.0 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 126.44680851063832, - "median": 128.0, - "mode": 55.0, - "variance": 110.97057492077867, - "std_dev": 10.534257207832866, - "min": 55.0, - "max": 128.0, - "count": 47, - "total_sum": 5943.0, - "percentiles": { - "p001": 55.0, - "p01": 55.0, - "p05": 128.0, - "p10": 128.0, - "p25": 128.0, - "p75": 128.0, - "p90": 128.0, - "p95": 128.0, - "p99": 128.0, - "p999": 128.0 - }, - "cumulative_distribution_function": null - } - }, - "time_to_first_token_ms": { - "successful": { - "mean": 16.792535781860348, - "median": 16.38054847717285, - "mode": 15.790939331054688, - "variance": 1.2776652847210441, - "std_dev": 1.1303385708366516, - "min": 15.790939331054688, - "max": 21.281957626342773, - "count": 46, - "total_sum": 772.4566459655762, - "percentiles": { - "p001": 15.790939331054688, - "p01": 15.790939331054688, - "p05": 15.971660614013672, - "p10": 16.034841537475586, - "p25": 16.111373901367188, - "p75": 16.840696334838867, - "p90": 18.505334854125977, - "p95": 19.00935173034668, - "p99": 21.281957626342773, - "p999": 21.281957626342773 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 16.777170465347616, - "median": 16.371726989746094, - "mode": 15.790939331054688, - "variance": 1.2613411927317046, - "std_dev": 1.1230944718641014, - "min": 15.790939331054688, - "max": 21.281957626342773, - "count": 47, - "total_sum": 788.5270118713379, - "percentiles": { - "p001": 15.790939331054688, - "p01": 15.790939331054688, - "p05": 15.971660614013672, - "p10": 16.034841537475586, - "p25": 16.100645065307617, - "p75": 16.840696334838867, - "p90": 18.505334854125977, - "p95": 19.00935173034668, - "p99": 21.281957626342773, - "p999": 21.281957626342773 - }, - "cumulative_distribution_function": null - } - }, - "time_per_output_token_ms": { - "successful": { - "mean": 4.90300272307966, - "median": 4.885653033852577, - "mode": 4.870360717177391, - "variance": 0.003163643010108571, - "std_dev": 0.05624627107736628, - "min": 4.870360717177391, - "max": 5.217265337705612, - "count": 46, - "total_sum": 225.5381252616644, - "percentiles": { - "p001": 4.870360717177391, - "p01": 4.870360717177391, - "p05": 4.8728808760643005, - "p10": 4.873953759670258, - "p25": 4.876237362623215, - "p75": 4.904214292764664, - "p90": 4.934689030051231, - "p95": 4.993332549929619, - "p99": 5.217265337705612, - "p999": 5.217265337705612 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 4.9022222114856975, - "median": 4.882922396063805, - "mode": 4.870360717177391, - "variance": 0.003199582258516055, - "std_dev": 0.05656485002646127, - "min": 4.81866489757191, - "max": 5.217265337705612, - "count": 47, - "total_sum": 230.3567901592363, - "percentiles": { - "p001": 4.81866489757191, - "p01": 4.870360717177391, - "p05": 4.872731864452362, - "p10": 4.873953759670258, - "p25": 4.876237362623215, - "p75": 4.904214292764664, - "p90": 4.934689030051231, - "p95": 4.993332549929619, - "p99": 5.217265337705612, - "p999": 5.217265337705612 - }, - "cumulative_distribution_function": null - } - }, - "inter_token_latency_ms": { - "successful": { - "mean": 4.941609043733832, - "median": 4.9241227427805505, - "mode": 4.90871001416304, - "variance": 0.003213660306132974, - "std_dev": 0.056689155101597465, - "min": 4.90871001416304, - "max": 5.258346167136365, - "count": 46, - "total_sum": 227.31401601175622, - "percentiles": { - "p001": 4.90871001416304, - "p01": 4.90871001416304, - "p05": 4.911250016820713, - "p10": 4.9123313483290785, - "p25": 4.91463293240765, - "p75": 4.9428301533376136, - "p90": 4.973544849185493, - "p95": 5.032650129062923, - "p99": 5.258346167136365, - "p999": 5.258346167136365 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 4.9413003057767115, - "median": 4.921370603906826, - "mode": 4.90871001416304, - "variance": 0.003194539306669541, - "std_dev": 0.056520255720135776, - "min": 4.9078994327121315, - "max": 5.258346167136365, - "count": 47, - "total_sum": 232.22191544446835, - "percentiles": { - "p001": 4.9078994327121315, - "p01": 4.90871001416304, - "p05": 4.911099831888995, - "p10": 4.9123313483290785, - "p25": 4.91463293240765, - "p75": 4.9428301533376136, - "p90": 4.973544849185493, - "p95": 5.032650129062923, - "p99": 5.258346167136365, - "p999": 5.258346167136365 - }, - "cumulative_distribution_function": null - } - }, - "output_tokens_per_second": { - "successful": { - "mean": 198.13346751788123, - "median": 203.04516628745705, - "mode": 203.5378269520066, - "variance": 613.9948900522365, - "std_dev": 24.778920276158857, - "min": 0.0, - "max": 203.69598368219124, - "count": 122, - "total_sum": 17849.590625912137, - "percentiles": { - "p001": 46.71289356157213, - "p01": 55.502236337170835, - "p05": 190.14888022486173, - "p10": 200.69400449782287, - "p25": 202.23259402121505, - "p75": 203.42923658938793, - "p90": 203.5378269520066, - "p95": 203.58722454130668, - "p99": 203.6860916860917, - "p999": 203.69598368219124 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 198.08514508750469, - "median": 203.04516628745705, - "mode": 203.5378269520066, - "variance": 619.6237334717947, - "std_dev": 24.89224243558211, - "min": 0.0, - "max": 203.69598368219124, - "count": 125, - "total_sum": 18310.99071823841, - "percentiles": { - "p001": 46.71289356157213, - "p01": 55.502236337170835, - "p05": 190.14888022486173, - "p10": 200.69400449782287, - "p25": 202.23259402121505, - "p75": 203.4193704835346, - "p90": 203.5378269520066, - "p95": 203.58722454130668, - "p99": 203.6860916860917, - "p999": 203.69598368219124 - }, - "cumulative_distribution_function": null - } - }, - "tokens_per_second": { - "successful": { - "mean": 992.6867036588937, - "median": 614.3700014647723, - "mode": 615.2712336805046, - "variance": 62014350.40386989, - "std_dev": 7874.919072845758, - "min": 0.0, - "max": 159300.81436773148, - "count": 139, - "total_sum": 5852579.912913391, - "percentiles": { - "p001": 46.71289356157213, - "p01": 55.502236337170835, - "p05": 574.9559972583961, - "p10": 606.8148148148148, - "p25": 611.5928842228055, - "p75": 615.0907757735738, - "p90": 615.4517975055026, - "p95": 615.542119166422, - "p99": 617.5359246171967, - "p999": 157985.65557672578 - }, - "cumulative_distribution_function": null - }, - "errored": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "incomplete": { - "mean": 0.0, - "median": 0.0, - "mode": 0.0, - "variance": 0.0, - "std_dev": 0.0, - "min": 0.0, - "max": 0.0, - "count": 0, - "total_sum": 0.0, - "percentiles": { - "p001": 0.0, - "p01": 0.0, - "p05": 0.0, - "p10": 0.0, - "p25": 0.0, - "p75": 0.0, - "p90": 0.0, - "p95": 0.0, - "p99": 0.0, - "p999": 0.0 - }, - "cumulative_distribution_function": null - }, - "total": { - "mean": 1002.1268169766876, - "median": 614.3700014647723, - "mode": 615.2712336805046, - "variance": 63939736.95341249, - "std_dev": 7996.232672541019, - "min": 0.0, - "max": 296531.848660591, - "count": 143, - "total_sum": 6151486.576325966, - "percentiles": { - "p001": 46.71289356157213, - "p01": 55.502236337170835, - "p05": 574.9559972583961, - "p10": 606.8148148148148, - "p25": 611.5928842228055, - "p75": 615.0907757735738, - "p90": 615.4517975055026, - "p95": 615.542119166422, - "p99": 1158.3275338304336, - "p999": 158008.81383758428 - }, - "cumulative_distribution_function": null - } - } - }, - "start_time": 1749157168.1827004, - "end_time": 1749157198.1799018, - "request_totals": { - "successful": 46, - "errored": 0, - "incomplete": 1, - "total": 47 - }, - "request_samples": null, - "requests": { - "successful": [ - { - "type_": "generative_text_response", - "request_id": "73054dd1-486f-4894-a861-075750b82453", - "request_type": "text_completions", - "scheduler_info": { - "requested": true, - "completed": true, - "errored": false, - "canceled": false, - "targeted_start_time": 1749157168.179883, - "queued_time": 1749157168.1811602, - "dequeued_time": 1749157168.1818697, - "scheduled_time": 1749157168.181895, - "worker_start": 1749157168.1820004, - "request_start": 1749157168.1827004, - "request_end": 1749157168.871885, - "worker_end": 1749157168.8723884, - "process_id": 0 - }, - "prompt": "such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner, as may in time make their past imprudence forgotten. Their conduct has been such, replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It is useless to talk of it. It now occurred to the girls that their mother was in all likelihood perfectly ignorant of what had happened. They went to the library, therefore, and asked their father whether he would not wish them to make it known to her. He was writing, and, without raising his head, coolly replied, Just as you please. May we take my uncle s letter to read to her? Take whatever you like, and get away", - "output": ", said Jane. The letter was read, and the girls retired to their own apartments. Elizabeth was the first to return. She found her mother seated in the drawing-room, and looking very pale. She was dressed in a loose white gown, and her hair was disordered. She rose as they entered, and clasped them both in her arms, and then, without saying a word, took her seat on the sofa, and began to weep. Elizabeth and Jane stood by her side, and listened to the sobs which issued from her heart. She had no words to express her gratitude, and, in a few minutes,", - "prompt_tokens": 257, - "output_tokens": 128, - "start_time": 1749157168.1827004, - "end_time": 1749157168.871885, - "first_token_time": 1749157168.2039824, - "last_token_time": 1749157168.8717923, - "request_latency": 0.6891846656799316, - "time_to_first_token_ms": 21.281957626342773, - "time_per_output_token_ms": 5.217265337705612, - "inter_token_latency_ms": 5.258346167136365, - "tokens_per_second": 558.631117568713, - "output_tokens_per_second": 185.72670921765 - } - ], - "total": null - }, - "duration": 29.997201442718506 - } - ] -} \ No newline at end of file +{"benchmarks": [{"type_": "generative_benchmark", "id_": "97ece514-8717-412f-9dba-2b42bcd9866f", "run_id": "93e36b31-b454-471d-ba62-6b2671585485", "args": {"profile": {"type_": "sweep", "completed_strategies": 10, "measured_rates": [1.5481806532737452], "measured_concurrencies": [0.9977627456483604], "max_concurrency": null, "strategy_type": "constant", "rate": -1.0, "initial_burst": true, "random_seed": 42, "sweep_size": 10, "rate_type": "constant", "strategy_types": ["synchronous", "throughput", "constant", "constant", "constant", "constant", "constant", "constant", "constant", "constant"]}, "strategy_index": 0, "strategy": {"type_": "synchronous"}, "max_number": null, "max_duration": 30.0, "warmup_number": null, "warmup_duration": null, "cooldown_number": null, "cooldown_duration": null}, "run_stats": {"start_time": 1749157168.054225, "end_time": 1749157198.213826, "requests_made": {"successful": 1, "errored": 0, "incomplete": 0, "total": 1}, "queued_time_avg": 0.631589580089488, "scheduled_time_delay_avg": 3.784260851271609e-06, "scheduled_time_sleep_avg": 0.0, "worker_start_delay_avg": 2.8021792148021943e-05, "worker_time_avg": 0.6373953819274902, "worker_start_time_targeted_delay_avg": 0.6319031715393066, "request_start_time_delay_avg": 0.316034068452551, "request_start_time_targeted_delay_avg": 0.6319856542222043, "request_time_delay_avg": 0.00029866238857837433, "request_time_avg": 0.6370967195389119}, "worker": {"type_": "generative_requests_worker", "backend_type": "openai_http", "backend_target": "example_target", "backend_model": "example_model", "backend_info": {"max_output_tokens": 16384, "timeout": 300, "http2": true, "authorization": false, "organization": null, "project": null, "text_completions_path": "/v1/completions", "chat_completions_path": "/v1/chat/completions"}}, "request_loader": {"type_": "generative_request_loader", "data": "prompt_tokens=256,output_tokens=128", "data_args": null, "processor": "example_processor", "processor_args": null}, "extras": {}, "metrics": {"requests_per_second": {"successful": {"mean": 1.5481806532737452, "median": 1.5530116578512305, "mode": 1.555484186315253, "variance": 0.0003352629331303757, "std_dev": 0.01831018659463567, "min": 1.4509899157628907, "max": 1.5597664461806156, "count": 45, "total_sum": 69.6707872953874, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5590938878953722, "p99": 1.5597664461806156, "p999": 1.5597664461806156}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1.5668128271815418, "median": 1.5530312090734288, "mode": 1.555484186315253, "variance": 0.036536424510388923, "std_dev": 0.19114503527528232, "min": 1.4509899157628907, "max": 3.509921881864626, "count": 46, "total_sum": 73.18070917725203, "percentiles": {"p001": 1.4509899157628907, "p01": 1.4509899157628907, "p05": 1.5190957942495127, "p10": 1.5377883923356668, "p25": 1.5483918601985445, "p75": 1.5567531615313124, "p90": 1.5583715343236735, "p95": 1.5591048992639953, "p99": 1.5597664461806156, "p999": 3.509921881864626}, "cumulative_distribution_function": null}}, "request_concurrency": {"successful": {"mean": 0.9977627456483604, "median": 1.0, "mode": 1.0, "variance": 0.002232249044605607, "std_dev": 0.047246682895263736, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 1.0, "median": 1.0, "mode": 1.0, "variance": 0.0, "std_dev": 0.0, "min": 1.0, "max": 1.0, "count": 1, "total_sum": 1.0, "percentiles": {"p001": 1.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}, "total": {"mean": 0.9977433642674269, "median": 1.0, "mode": 1.0, "variance": 0.002251543327743578, "std_dev": 0.047450430216633206, "min": 0.0, "max": 1.0, "count": 2, "total_sum": 1.0, "percentiles": {"p001": 0.0, "p01": 1.0, "p05": 1.0, "p10": 1.0, "p25": 1.0, "p75": 1.0, "p90": 1.0, "p95": 1.0, "p99": 1.0, "p999": 1.0}, "cumulative_distribution_function": null}}, "request_latency": {"successful": {"mean": 0.6444743664368339, "median": 0.6424565315246582, "mode": 0.6395885944366455, "variance": 6.414585873782315e-05, "std_dev": 0.008009110982988258, "min": 0.6395885944366455, "max": 0.6891846656799316, "count": 46, "total_sum": 29.64582085609436, "percentiles": {"p001": 0.6395885944366455, "p01": 0.6395885944366455, "p05": 0.6399857997894287, "p10": 0.6403069496154785, "p25": 0.6409540176391602, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.2836878299713135, "median": 0.2836878299713135, "mode": 0.2836878299713135, "variance": 0.0, "std_dev": 0.0, "min": 0.2836878299713135, "max": 0.2836878299713135, "count": 1, "total_sum": 0.2836878299713135, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.2836878299713135, "p10": 0.2836878299713135, "p25": 0.2836878299713135, "p75": 0.2836878299713135, "p90": 0.2836878299713135, "p95": 0.2836878299713135, "p99": 0.2836878299713135, "p999": 0.2836878299713135}, "cumulative_distribution_function": null}, "total": {"mean": 0.6367980571503334, "median": 0.642310380935669, "mode": 0.2836878299713135, "variance": 0.0027733643692853522, "std_dev": 0.05266274175624881, "min": 0.2836878299713135, "max": 0.6891846656799316, "count": 47, "total_sum": 29.929508686065674, "percentiles": {"p001": 0.2836878299713135, "p01": 0.2836878299713135, "p05": 0.6398613452911377, "p10": 0.6402454376220703, "p25": 0.640899658203125, "p75": 0.644390344619751, "p90": 0.6488735675811768, "p95": 0.656728982925415, "p99": 0.6891846656799316, "p999": 0.6891846656799316}, "cumulative_distribution_function": null}}, "prompt_token_count": {"successful": {"mean": 257.1086956521739, "median": 257.0, "mode": 257.0, "variance": 0.14035916824196598, "std_dev": 0.37464538999161057, "min": 257.0, "max": 259.0, "count": 46, "total_sum": 11827.0, "percentiles": {"p001": 257.0, "p01": 257.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 256.0, "median": 256.0, "mode": 256.0, "variance": 0.0, "std_dev": 0.0, "min": 256.0, "max": 256.0, "count": 1, "total_sum": 256.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 256.0, "p10": 256.0, "p25": 256.0, "p75": 256.0, "p90": 256.0, "p95": 256.0, "p99": 256.0, "p999": 256.0}, "cumulative_distribution_function": null}, "total": {"mean": 257.0851063829787, "median": 257.0, "mode": 256.0, "variance": 0.16296966953372566, "std_dev": 0.40369502044702715, "min": 256.0, "max": 259.0, "count": 47, "total_sum": 12083.0, "percentiles": {"p001": 256.0, "p01": 256.0, "p05": 257.0, "p10": 257.0, "p25": 257.0, "p75": 257.0, "p90": 257.0, "p95": 258.0, "p99": 259.0, "p999": 259.0}, "cumulative_distribution_function": null}}, "output_token_count": {"successful": {"mean": 127.99999999999999, "median": 128.0, "mode": 128.0, "variance": 2.01948391736579e-28, "std_dev": 1.4210854715202002e-14, "min": 128.0, "max": 128.0, "count": 46, "total_sum": 5888.0, "percentiles": {"p001": 128.0, "p01": 128.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 126.44680851063832, "median": 128.0, "mode": 55.0, "variance": 110.97057492077867, "std_dev": 10.534257207832866, "min": 55.0, "max": 128.0, "count": 47, "total_sum": 5943.0, "percentiles": {"p001": 55.0, "p01": 55.0, "p05": 128.0, "p10": 128.0, "p25": 128.0, "p75": 128.0, "p90": 128.0, "p95": 128.0, "p99": 128.0, "p999": 128.0}, "cumulative_distribution_function": null}}, "time_to_first_token_ms": {"successful": {"mean": 16.792535781860348, "median": 16.38054847717285, "mode": 15.790939331054688, "variance": 1.2776652847210441, "std_dev": 1.1303385708366516, "min": 15.790939331054688, "max": 21.281957626342773, "count": 46, "total_sum": 772.4566459655762, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.111373901367188, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 16.777170465347616, "median": 16.371726989746094, "mode": 15.790939331054688, "variance": 1.2613411927317046, "std_dev": 1.1230944718641014, "min": 15.790939331054688, "max": 21.281957626342773, "count": 47, "total_sum": 788.5270118713379, "percentiles": {"p001": 15.790939331054688, "p01": 15.790939331054688, "p05": 15.971660614013672, "p10": 16.034841537475586, "p25": 16.100645065307617, "p75": 16.840696334838867, "p90": 18.505334854125977, "p95": 19.00935173034668, "p99": 21.281957626342773, "p999": 21.281957626342773}, "cumulative_distribution_function": null}}, "time_per_output_token_ms": {"successful": {"mean": 4.90300272307966, "median": 4.885653033852577, "mode": 4.870360717177391, "variance": 0.003163643010108571, "std_dev": 0.05624627107736628, "min": 4.870360717177391, "max": 5.217265337705612, "count": 46, "total_sum": 225.5381252616644, "percentiles": {"p001": 4.870360717177391, "p01": 4.870360717177391, "p05": 4.8728808760643005, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9022222114856975, "median": 4.882922396063805, "mode": 4.870360717177391, "variance": 0.003199582258516055, "std_dev": 0.05656485002646127, "min": 4.81866489757191, "max": 5.217265337705612, "count": 47, "total_sum": 230.3567901592363, "percentiles": {"p001": 4.81866489757191, "p01": 4.870360717177391, "p05": 4.872731864452362, "p10": 4.873953759670258, "p25": 4.876237362623215, "p75": 4.904214292764664, "p90": 4.934689030051231, "p95": 4.993332549929619, "p99": 5.217265337705612, "p999": 5.217265337705612}, "cumulative_distribution_function": null}}, "inter_token_latency_ms": {"successful": {"mean": 4.941609043733832, "median": 4.9241227427805505, "mode": 4.90871001416304, "variance": 0.003213660306132974, "std_dev": 0.056689155101597465, "min": 4.90871001416304, "max": 5.258346167136365, "count": 46, "total_sum": 227.31401601175622, "percentiles": {"p001": 4.90871001416304, "p01": 4.90871001416304, "p05": 4.911250016820713, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 4.9413003057767115, "median": 4.921370603906826, "mode": 4.90871001416304, "variance": 0.003194539306669541, "std_dev": 0.056520255720135776, "min": 4.9078994327121315, "max": 5.258346167136365, "count": 47, "total_sum": 232.22191544446835, "percentiles": {"p001": 4.9078994327121315, "p01": 4.90871001416304, "p05": 4.911099831888995, "p10": 4.9123313483290785, "p25": 4.91463293240765, "p75": 4.9428301533376136, "p90": 4.973544849185493, "p95": 5.032650129062923, "p99": 5.258346167136365, "p999": 5.258346167136365}, "cumulative_distribution_function": null}}, "output_tokens_per_second": {"successful": {"mean": 198.13346751788123, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 613.9948900522365, "std_dev": 24.778920276158857, "min": 0.0, "max": 203.69598368219124, "count": 122, "total_sum": 17849.590625912137, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.42923658938793, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 198.08514508750469, "median": 203.04516628745705, "mode": 203.5378269520066, "variance": 619.6237334717947, "std_dev": 24.89224243558211, "min": 0.0, "max": 203.69598368219124, "count": 125, "total_sum": 18310.99071823841, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 190.14888022486173, "p10": 200.69400449782287, "p25": 202.23259402121505, "p75": 203.4193704835346, "p90": 203.5378269520066, "p95": 203.58722454130668, "p99": 203.6860916860917, "p999": 203.69598368219124}, "cumulative_distribution_function": null}}, "tokens_per_second": {"successful": {"mean": 992.6867036588937, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 62014350.40386989, "std_dev": 7874.919072845758, "min": 0.0, "max": 159300.81436773148, "count": 139, "total_sum": 5852579.912913391, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 617.5359246171967, "p999": 157985.65557672578}, "cumulative_distribution_function": null}, "errored": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "incomplete": {"mean": 0.0, "median": 0.0, "mode": 0.0, "variance": 0.0, "std_dev": 0.0, "min": 0.0, "max": 0.0, "count": 0, "total_sum": 0.0, "percentiles": {"p001": 0.0, "p01": 0.0, "p05": 0.0, "p10": 0.0, "p25": 0.0, "p75": 0.0, "p90": 0.0, "p95": 0.0, "p99": 0.0, "p999": 0.0}, "cumulative_distribution_function": null}, "total": {"mean": 1002.1268169766876, "median": 614.3700014647723, "mode": 615.2712336805046, "variance": 63939736.95341249, "std_dev": 7996.232672541019, "min": 0.0, "max": 296531.848660591, "count": 143, "total_sum": 6151486.576325966, "percentiles": {"p001": 46.71289356157213, "p01": 55.502236337170835, "p05": 574.9559972583961, "p10": 606.8148148148148, "p25": 611.5928842228055, "p75": 615.0907757735738, "p90": 615.4517975055026, "p95": 615.542119166422, "p99": 1158.3275338304336, "p999": 158008.81383758428}, "cumulative_distribution_function": null}}}, "start_time": 1749157168.1827004, "end_time": 1749157198.1799018, "request_totals": {"successful": 46, "errored": 0, "incomplete": 1, "total": 47}, "request_samples": null, "requests": {"successful": [{"type_": "generative_text_response", "request_id": "73054dd1-486f-4894-a861-075750b82453", "request_type": "text_completions", "scheduler_info": {"requested": true, "completed": true, "errored": false, "canceled": false, "targeted_start_time": 1749157168.179883, "queued_time": 1749157168.1811602, "dequeued_time": 1749157168.1818697, "scheduled_time": 1749157168.181895, "worker_start": 1749157168.1820004, "request_start": 1749157168.1827004, "request_end": 1749157168.871885, "worker_end": 1749157168.8723884, "process_id": 0}, "prompt": "such a sacrifice to her advantage as years of gratitude cannot enough acknowledge. By this time she is actually with them! If such goodness does not make her miserable now, she will never deserve to be happy! What a meeting for her, when she first sees my aunt! We must endeavour to forget all that has passed on either side, said Jane I hope and trust they will yet be happy. His consenting to marry her is a proof, I will believe, that he is come to a right way of thinking. Their mutual affection will steady them; and I flatter myself they will settle so quietly, and live in so rational a manner, as may in time make their past imprudence forgotten. Their conduct has been such, replied Elizabeth, as neither you, nor I, nor anybody, can ever forget. It is useless to talk of it. It now occurred to the girls that their mother was in all likelihood perfectly ignorant of what had happened. They went to the library, therefore, and asked their father whether he would not wish them to make it known to her. He was writing, and, without raising his head, coolly replied, Just as you please. May we take my uncle s letter to read to her? Take whatever you like, and get away", "output": ", said Jane. The letter was read, and the girls retired to their own apartments. Elizabeth was the first to return. She found her mother seated in the drawing-room, and looking very pale. She was dressed in a loose white gown, and her hair was disordered. She rose as they entered, and clasped them both in her arms, and then, without saying a word, took her seat on the sofa, and began to weep. Elizabeth and Jane stood by her side, and listened to the sobs which issued from her heart. She had no words to express her gratitude, and, in a few minutes,", "prompt_tokens": 257, "output_tokens": 128, "start_time": 1749157168.1827004, "end_time": 1749157168.871885, "first_token_time": 1749157168.2039824, "last_token_time": 1749157168.8717923, "request_latency": 0.6891846656799316, "time_to_first_token_ms": 21.281957626342773, "time_per_output_token_ms": 5.217265337705612, "inter_token_latency_ms": 5.258346167136365, "tokens_per_second": 558.631117568713, "output_tokens_per_second": 185.72670921765}], "errored": [], "incomplete": [], "total": null}, "duration": 29.997201442718506}]} \ No newline at end of file diff --git a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py index 002bdca04..3551870ee 100644 --- a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py +++ b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py @@ -3,6 +3,7 @@ from pathlib import Path import pytest +import filecmp from guidellm.benchmark import reimport_benchmarks_report @@ -17,6 +18,14 @@ def _() -> Path: return _ +@pytest.fixture +def cleanup(): + to_delete = [] + yield to_delete + for item in to_delete: + if os.path.exists(item): + os.remove(item) + def test_display_entrypoint_json(capfd, get_test_asset_dir): generic_test_display_entrypoint( @@ -37,7 +46,7 @@ def test_display_entrypoint_yaml(capfd, get_test_asset_dir): def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): os.environ["COLUMNS"] = "180" # CLI output depends on terminal width. asset_dir = get_test_asset_dir() - reimport_benchmarks_report(asset_dir / filename) + reimport_benchmarks_report(asset_dir / filename, None) out, err = capfd.readouterr() expected_output_path = asset_dir / "benchmarks_stripped_output.txt" if REGENERATE_ARTIFACTS: @@ -49,6 +58,19 @@ def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): expected_output = file.read() assert out == expected_output +def test_reexporting_benchmark(get_test_asset_dir, cleanup): + asset_dir = get_test_asset_dir() + source_file = asset_dir / "benchmarks_stripped.json" + exported_file = asset_dir / "benchmarks_reexported.json" + # If you need to inspect the output to see why it failed, comment out the following statement. + cleanup.append(exported_file) + if exported_file.exists(): + os.remove(exported_file) + reimport_benchmarks_report(source_file, exported_file) + # The reexported file should exist and be identical to the source. + assert exported_file.exists() + assert filecmp.cmp(source_file, exported_file, shallow=False) + if __name__ == "__main__": unittest.main() From 83b0c77a7dea04e6d173e12200a17349ae3cc6ff Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 26 Jun 2025 17:58:12 -0400 Subject: [PATCH 14/18] Switch to internal dependency, and fix linter errors --- pyproject.toml | 1 - src/guidellm/__main__.py | 21 ++-- src/guidellm/benchmark/entrypoints.py | 1 + src/guidellm/benchmark/output.py | 5 +- src/guidellm/utils/__init__.py | 2 + src/guidellm/utils/default_group.py | 103 ++++++++++++++++++ .../test_benchmark_from_file_entrypoint.py | 15 ++- 7 files changed, 129 insertions(+), 19 deletions(-) create mode 100644 src/guidellm/utils/default_group.py diff --git a/pyproject.toml b/pyproject.toml index e528b9159..a78b1fc50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,6 @@ dependencies = [ "pyyaml>=6.0.0", "rich", "transformers", - "click-default-group~=1.2.4" ] [project.optional-dependencies] diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 5c07164e6..4deff3bff 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -2,7 +2,6 @@ import codecs from pathlib import Path from typing import get_args -from click_default_group import DefaultGroup import click from pydantic import ValidationError @@ -10,7 +9,6 @@ from guidellm.backend import BackendType from guidellm.benchmark import ( ProfileType, - benchmark_generative_text, reimport_benchmarks_report, ) from guidellm.benchmark.entrypoints import benchmark_with_scenario @@ -18,6 +16,7 @@ from guidellm.config import print_config from guidellm.preprocess.dataset import ShortPromptStrategy, process_dataset from guidellm.scheduler import StrategyType +from guidellm.utils import DefaultGroupHandler from guidellm.utils import cli as cli_tools STRATEGY_PROFILE_CHOICES = set( @@ -29,11 +28,11 @@ def cli(): pass + @cli.group( help="Commands to run a new benchmark or load a prior one.", - cls=DefaultGroup, + cls=DefaultGroupHandler, default="run", - default_if_no_args=True, ) def benchmark(): pass @@ -334,15 +333,15 @@ def run( is_flag=False, flag_value=Path.cwd() / "benchmarks_reexported.json", help=( - "Allows re-exporting the benchmarks to another format." + "Allows re-exporting the benchmarks to another format. " "The path to save the output to. If it is a directory, " "it will save benchmarks.json under it. " "Otherwise, json, yaml, or csv files are supported for output types " - "which will be read from the extension for the file path." - "Optional. If the output path flag is not provided, the benchmarks " - "will not be reexported. If the flag is present but no value is " - "specified, it will default to the current directory with the file " - "name benchmarks_reexported.json." + "which will be read from the extension for the file path. " + "This input is optional. If the output path flag is not provided, " + "the benchmarks will not be reexported. If the flag is present but " + "no value is specified, it will default to the current directory " + "with the file name `benchmarks_reexported.json`." ), ) def from_file(path, output_path): @@ -368,7 +367,7 @@ def decode_escaped_str(_ctx, _param, value): help=( "Print out the available configuration settings that can be set " "through environment variables." - ) + ), ) def config(): print_config() diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 4a009f9b2..2ef85c3ec 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -147,6 +147,7 @@ async def benchmark_generative_text( return report, saved_path + def reimport_benchmarks_report(file: Path, output_path: Optional[Path]) -> None: """ The command-line entry point for re-importing and displaying an diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 442809573..5e4c4c670 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -242,7 +242,10 @@ def _file_setup( if path_suffix in [".csv"]: return path, "csv" - raise ValueError(f"Unsupported file extension: {path_suffix} for {path}; expected json, yaml, or csv.") + raise ValueError( + f"Unsupported file extension: {path_suffix} for {path}; " + "expected json, yaml, or csv." + ) @staticmethod def _benchmark_desc_headers_and_values( diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py index 399c021d5..fb9262c31 100644 --- a/src/guidellm/utils/__init__.py +++ b/src/guidellm/utils/__init__.py @@ -1,4 +1,5 @@ from .colors import Colors +from .default_group import DefaultGroupHandler from .hf_datasets import ( SUPPORTED_TYPES, save_dataset_to_file, @@ -20,6 +21,7 @@ __all__ = [ "SUPPORTED_TYPES", "Colors", + "DefaultGroupHandler", "EndlessTextCreator", "IntegerRangeSampler", "check_load_processor", diff --git a/src/guidellm/utils/default_group.py b/src/guidellm/utils/default_group.py new file mode 100644 index 000000000..0277fe1e1 --- /dev/null +++ b/src/guidellm/utils/default_group.py @@ -0,0 +1,103 @@ +""" +File uses code adapted from code with the following license: + +Copyright (c) 2015-2023, Heungsub Lee +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ["DefaultGroupHandler"] + +import click + + +class DefaultGroupHandler(click.Group): + """ + Allows the migration to a new sub-command by allowing the group to run + one of its sub-commands as the no-args default command. + """ + + def __init__(self, *args, **kwargs): + # To resolve as the default command. + if not kwargs.get('ignore_unknown_options', True): + raise ValueError('Default group accepts unknown options') + self.ignore_unknown_options = True + self.default_cmd_name = kwargs.pop('default', None) + self.default_if_no_args = kwargs.pop('default_if_no_args', False) + super(DefaultGroupHandler, self).__init__(*args, **kwargs) + + def parse_args(self, ctx, args): + if not args and self.default_if_no_args: + args.insert(0, self.default_cmd_name) + return super(DefaultGroupHandler, self).parse_args(ctx, args) + + def get_command(self, ctx, cmd_name): + if cmd_name not in self.commands: + # If it doesn't match an existing command, use the default command name. + ctx.arg0 = cmd_name + cmd_name = self.default_cmd_name + return super(DefaultGroupHandler, self).get_command(ctx, cmd_name) + + def resolve_command(self, ctx, args): + base = super(DefaultGroupHandler, self) + cmd_name, cmd, args = base.resolve_command(ctx, args) + if hasattr(ctx, 'arg0'): + args.insert(0, ctx.arg0) + cmd_name = cmd.name + return cmd_name, cmd, args + + def format_commands(self, ctx, formatter): + """ + Used to wrap the default formatter to clarify which command is the default. + """ + formatter = DefaultCommandFormatter(self, formatter, mark=' (default)') + return super(DefaultGroupHandler, self).format_commands(ctx, formatter) + + +class DefaultCommandFormatter(object): + """ + Wraps a formatter to edit the line for the default command to mark it + with the specified mark string. + """ + + def __init__(self, group, formatter, mark='*'): + self.group = group + self.formatter = formatter + self.mark = mark + super().__init__() + + def __getattr__(self, attr): + return getattr(self.formatter, attr) + + def write_dl(self, rows, *args, **kwargs): + rows_ = [] + for cmd_name, help_msg in rows: + if cmd_name == self.group.default_cmd_name: + rows_.insert(0, (cmd_name + self.mark, help_msg)) + else: + rows_.append((cmd_name, help_msg)) + return self.formatter.write_dl(rows_, *args, **kwargs) \ No newline at end of file diff --git a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py index 3551870ee..d76265bea 100644 --- a/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py +++ b/tests/unit/entrypoints/test_benchmark_from_file_entrypoint.py @@ -1,9 +1,9 @@ +import filecmp import os import unittest from pathlib import Path import pytest -import filecmp from guidellm.benchmark import reimport_benchmarks_report @@ -18,13 +18,14 @@ def _() -> Path: return _ + @pytest.fixture def cleanup(): - to_delete = [] + to_delete: list[Path] = [] yield to_delete for item in to_delete: - if os.path.exists(item): - os.remove(item) + if item.exists(): + item.unlink() # Deletes the file def test_display_entrypoint_json(capfd, get_test_asset_dir): @@ -58,14 +59,16 @@ def generic_test_display_entrypoint(filename, capfd, get_test_asset_dir): expected_output = file.read() assert out == expected_output + def test_reexporting_benchmark(get_test_asset_dir, cleanup): asset_dir = get_test_asset_dir() source_file = asset_dir / "benchmarks_stripped.json" exported_file = asset_dir / "benchmarks_reexported.json" - # If you need to inspect the output to see why it failed, comment out the following statement. + # If you need to inspect the output to see why it failed, comment out + # the cleanup statement. cleanup.append(exported_file) if exported_file.exists(): - os.remove(exported_file) + exported_file.unlink() reimport_benchmarks_report(source_file, exported_file) # The reexported file should exist and be identical to the source. assert exported_file.exists() From c0baf33cf78ddf03fbe8fc0966d5b824ad6e11ca Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Thu, 26 Jun 2025 18:08:35 -0400 Subject: [PATCH 15/18] Update documentation to reflect command change --- README.md | 8 ++++---- docs/datasets.md | 12 ++++++------ docs/outputs.md | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index fb70f0722..d8381092a 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,12 @@ For information on starting other supported inference servers or platforms, see #### 2. Run a GuideLLM Benchmark -To run a GuideLLM benchmark, use the `guidellm benchmark` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. +To run a GuideLLM benchmark, use the `guidellm benchmark run` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. Now, to start benchmarking, run the following command: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -110,11 +110,11 @@ For further details on determining the optimal request rate and SLOs, refer to t ### Configurations -GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark --help` or `guidellm config` respectively. +GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark run --help` or `guidellm config` respectively. #### Benchmark CLI -The `guidellm benchmark` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: +The `guidellm benchmark run` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: - `--target`: Specifies the target path for the backend to run benchmarks against. For example, `http://localhost:8000`. This is required to define the server endpoint. diff --git a/docs/datasets.md b/docs/datasets.md index a5d0aa4e5..a626b58c9 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -20,7 +20,7 @@ The following arguments can be used to configure datasets and their processing: ### Example Usage ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -49,7 +49,7 @@ For different use cases, here are the recommended dataset profiles to pass as ar #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -59,7 +59,7 @@ guidellm benchmark \ Or using a JSON string: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -90,7 +90,7 @@ GuideLLM supports datasets from the Hugging Face Hub or local directories that f #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -100,7 +100,7 @@ guidellm benchmark \ Or using a local dataset: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -152,7 +152,7 @@ GuideLLM supports various file formats for datasets, including text, CSV, JSON, #### Example Commands ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ diff --git a/docs/outputs.md b/docs/outputs.md index ea3d9a6f0..29a16ef58 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -5,7 +5,7 @@ GuideLLM provides flexible options for outputting benchmark results, catering to For all of the output formats, `--output-extras` can be used to include additional information. This could include tags, metadata, hardware details, and other relevant information that can be useful for analysis. This must be supplied as a JSON encoded string. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -26,10 +26,10 @@ By default, GuideLLM displays benchmark results and progress directly in the con ### Disabling Console Output -To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark` command. For example: +To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -37,10 +37,10 @@ guidellm benchmark \ --disable-progress ``` -To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark` command. For example: +To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -50,10 +50,10 @@ guidellm benchmark \ ### Enabling Extra Information -GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark` command. For example: +GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark run` command. For example: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -81,7 +81,7 @@ GuideLLM supports saving benchmark results to files in various formats, includin Example command to save results in YAML format: ```bash -guidellm benchmark \ +guidellm benchmark run \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ From 534fbe42f392025d483547b23c85e5a6ce227878 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Fri, 27 Jun 2025 17:00:54 -0400 Subject: [PATCH 16/18] Fix linter errors --- src/guidellm/utils/default_group.py | 35 +++++++++++++++-------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/guidellm/utils/default_group.py b/src/guidellm/utils/default_group.py index 0277fe1e1..b3f0f03b0 100644 --- a/src/guidellm/utils/default_group.py +++ b/src/guidellm/utils/default_group.py @@ -32,6 +32,8 @@ __all__ = ["DefaultGroupHandler"] +import collections.abc as cabc + import click @@ -43,29 +45,28 @@ class DefaultGroupHandler(click.Group): def __init__(self, *args, **kwargs): # To resolve as the default command. - if not kwargs.get('ignore_unknown_options', True): - raise ValueError('Default group accepts unknown options') + if not kwargs.get("ignore_unknown_options", True): + raise ValueError("Default group accepts unknown options") self.ignore_unknown_options = True - self.default_cmd_name = kwargs.pop('default', None) - self.default_if_no_args = kwargs.pop('default_if_no_args', False) - super(DefaultGroupHandler, self).__init__(*args, **kwargs) + self.default_cmd_name = kwargs.pop("default", None) + self.default_if_no_args = kwargs.pop("default_if_no_args", False) + super().__init__(*args, **kwargs) def parse_args(self, ctx, args): if not args and self.default_if_no_args: args.insert(0, self.default_cmd_name) - return super(DefaultGroupHandler, self).parse_args(ctx, args) + return super().parse_args(ctx, args) def get_command(self, ctx, cmd_name): if cmd_name not in self.commands: # If it doesn't match an existing command, use the default command name. ctx.arg0 = cmd_name cmd_name = self.default_cmd_name - return super(DefaultGroupHandler, self).get_command(ctx, cmd_name) + return super().get_command(ctx, cmd_name) def resolve_command(self, ctx, args): - base = super(DefaultGroupHandler, self) - cmd_name, cmd, args = base.resolve_command(ctx, args) - if hasattr(ctx, 'arg0'): + cmd_name, cmd, args = super().resolve_command(ctx, args) + if hasattr(ctx, "arg0"): args.insert(0, ctx.arg0) cmd_name = cmd.name return cmd_name, cmd, args @@ -74,17 +75,17 @@ def format_commands(self, ctx, formatter): """ Used to wrap the default formatter to clarify which command is the default. """ - formatter = DefaultCommandFormatter(self, formatter, mark=' (default)') - return super(DefaultGroupHandler, self).format_commands(ctx, formatter) + formatter = DefaultCommandFormatter(self, formatter, mark=" (default)") + return super().format_commands(ctx, formatter) -class DefaultCommandFormatter(object): +class DefaultCommandFormatter: """ Wraps a formatter to edit the line for the default command to mark it with the specified mark string. """ - def __init__(self, group, formatter, mark='*'): + def __init__(self, group, formatter, mark="*"): self.group = group self.formatter = formatter self.mark = mark @@ -93,11 +94,11 @@ def __init__(self, group, formatter, mark='*'): def __getattr__(self, attr): return getattr(self.formatter, attr) - def write_dl(self, rows, *args, **kwargs): - rows_ = [] + def write_dl(self, rows: cabc.Sequence[tuple[str, str]], *args, **kwargs): + rows_: list[tuple[str, str]] = [] for cmd_name, help_msg in rows: if cmd_name == self.group.default_cmd_name: rows_.insert(0, (cmd_name + self.mark, help_msg)) else: rows_.append((cmd_name, help_msg)) - return self.formatter.write_dl(rows_, *args, **kwargs) \ No newline at end of file + return self.formatter.write_dl(rows_, *args, **kwargs) From bece157f03c7f8d4d6cbc22fa3b21c8627d3b590 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Tue, 8 Jul 2025 17:06:12 -0400 Subject: [PATCH 17/18] Revert docs changes --- README.md | 8 ++++---- docs/datasets.md | 12 ++++++------ docs/outputs.md | 16 ++++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 93f09f415..1e489bb5e 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,12 @@ For information on starting other supported inference servers or platforms, see #### 2. Run a GuideLLM Benchmark -To run a GuideLLM benchmark, use the `guidellm benchmark run` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. +To run a GuideLLM benchmark, use the `guidellm benchmark` command with the target set to an OpenAI-compatible server. For this example, the target is set to 'http://localhost:8000', assuming that vLLM is active and running on the same server. Otherwise, update it to the appropriate location. By default, GuideLLM automatically determines the model available on the server and uses it. To target a different model, pass the desired name with the `--model` argument. Additionally, the `--rate-type` is set to `sweep`, which automatically runs a range of benchmarks to determine the minimum and maximum rates that the server and model can support. Each benchmark run under the sweep will run for 30 seconds, as set by the `--max-seconds` argument. Finally, `--data` is set to a synthetic dataset with 256 prompt tokens and 128 output tokens per request. For more arguments, supported scenarios, and configurations, jump to the [Configurations Section](#configurations) or run `guidellm benchmark --help`. Now, to start benchmarking, run the following command: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -110,11 +110,11 @@ For further details on determining the optimal request rate and SLOs, refer to t ### Configurations -GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark run --help` or `guidellm config` respectively. +GuideLLM offers a range of configurations through both the benchmark CLI command and environment variables, which provide default values and more granular controls. The most common configurations are listed below. A complete list is easily accessible, though, by running `guidellm benchmark --help` or `guidellm config` respectively. #### Benchmark CLI -The `guidellm benchmark run` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: +The `guidellm benchmark` command is used to run benchmarks against a generative AI backend/server. The command accepts a variety of arguments to customize the benchmark run. The most common arguments include: - `--target`: Specifies the target path for the backend to run benchmarks against. For example, `http://localhost:8000`. This is required to define the server endpoint. diff --git a/docs/datasets.md b/docs/datasets.md index a626b58c9..a5d0aa4e5 100644 --- a/docs/datasets.md +++ b/docs/datasets.md @@ -20,7 +20,7 @@ The following arguments can be used to configure datasets and their processing: ### Example Usage ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -49,7 +49,7 @@ For different use cases, here are the recommended dataset profiles to pass as ar #### Example Commands ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -59,7 +59,7 @@ guidellm benchmark run \ Or using a JSON string: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -90,7 +90,7 @@ GuideLLM supports datasets from the Hugging Face Hub or local directories that f #### Example Commands ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -100,7 +100,7 @@ guidellm benchmark run \ Or using a local dataset: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ @@ -152,7 +152,7 @@ GuideLLM supports various file formats for datasets, including text, CSV, JSON, #### Example Commands ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type "throughput" \ --max-requests 1000 \ diff --git a/docs/outputs.md b/docs/outputs.md index 29a16ef58..ea3d9a6f0 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -5,7 +5,7 @@ GuideLLM provides flexible options for outputting benchmark results, catering to For all of the output formats, `--output-extras` can be used to include additional information. This could include tags, metadata, hardware details, and other relevant information that can be useful for analysis. This must be supplied as a JSON encoded string. For example: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -26,10 +26,10 @@ By default, GuideLLM displays benchmark results and progress directly in the con ### Disabling Console Output -To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark run` command. For example: +To disable the progress outputs to the console, use the `disable-progress` flag when running the `guidellm benchmark` command. For example: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -37,10 +37,10 @@ guidellm benchmark run \ --disable-progress ``` -To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark run` command. For example: +To disable console output, use the `--disable-console-outputs` flag when running the `guidellm benchmark` command. For example: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -50,10 +50,10 @@ guidellm benchmark run \ ### Enabling Extra Information -GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark run` command. For example: +GuideLLM includes the option to display extra information during the benchmark runs to monitor the overheads and performance of the system. This can be enabled by using the `--display-scheduler-stats` flag when running the `guidellm benchmark` command. For example: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ @@ -81,7 +81,7 @@ GuideLLM supports saving benchmark results to files in various formats, includin Example command to save results in YAML format: ```bash -guidellm benchmark run \ +guidellm benchmark \ --target "http://localhost:8000" \ --rate-type sweep \ --max-seconds 30 \ From be1730d3a8d08a6eb193d7dca6d72cbec16c45c5 Mon Sep 17 00:00:00 2001 From: Jared O'Connell Date: Tue, 8 Jul 2025 17:06:28 -0400 Subject: [PATCH 18/18] Update command to use hyphen --- src/guidellm/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index 4deff3bff..3babc24ac 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -320,7 +320,10 @@ def run( ) -@benchmark.command(help="Load a saved benchmark report.") +@benchmark.command( + "from-file", + help="Load a saved benchmark report." +) @click.argument( "path", type=click.Path(file_okay=True, dir_okay=False, exists=True),