Skip to content
16 changes: 16 additions & 0 deletions src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,22 @@ def output_token_throughput(self) -> float:

return total_tokens / self.duration

@property
def output_token_throughput_distribution(self) -> Distribution:
"""
Get the distribution for output token throughput.

:return: The distribution of output token throughput.
:rtype: Distribution
"""
throughputs = []
for r in self.results:
duration = (r.end_time or 0) - (r.start_time or 0)
if duration > 0:
throughputs.append(r.output_token_count / duration)

return Distribution(data=throughputs)

Comment on lines +404 to +418
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UI relies on the output throughput distribution, and I didn't find any methods/properties that were in the token/(unit of time) shape the UI expects so I added this.

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand Down
4 changes: 3 additions & 1 deletion src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)
from guidellm.request.base import RequestGenerator
from guidellm.utils import BenchmarkReportProgress, cli_params
from guidellm.utils.generate_ui_data import generate_ui_api_data

__all__ = ["generate_benchmark_report"]

Expand Down Expand Up @@ -183,7 +184,6 @@ def generate_benchmark_report_cli(
cont_refresh_table=enable_continuous_refresh,
)


def generate_benchmark_report(
target: str,
backend: BackendEnginePublic,
Expand Down Expand Up @@ -285,6 +285,8 @@ def generate_benchmark_report(
)
report = asyncio.run(_run_executor_for_result(executor))

generate_ui_api_data(report)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just so I can run this simply and look at the json generated

# Save and print report
guidance_report = GuidanceReport()
guidance_report.benchmarks.append(report)
Expand Down
2 changes: 2 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .injector import create_report, inject_data
from .generate_ui_data import generate_ui_api_data
from .progress import BenchmarkReportProgress
from .text import (
clean_text,
Expand All @@ -24,6 +25,7 @@
"clean_text",
"create_report",
"filter_text",
"generate_ui_api_data",
"inject_data",
"is_path",
"is_path_like",
Expand Down
154 changes: 154 additions & 0 deletions src/guidellm/utils/generate_ui_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import os
import json
import random
from typing import Any, Dict, List
from guidellm.core.distribution import Distribution
from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark

def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
total = dist.__len__()
mean = dist.mean
median = dist.median
minv = dist.min
maxv = dist.max
std_dev = dist.std_deviation

pvals = dist.percentiles([50, 90, 95, 99])

percentile_list = [
{"percentile": "p50", "value": pvals[0]},
{"percentile": "p90", "value": pvals[1]},
{"percentile": "p95", "value": pvals[2]},
{"percentile": "p99", "value": pvals[3]},
]

if dist.range == 0:
buckets = [{"value": minv, "count": total}]
bucket_width = 0
else:
bucket_width = dist.range / n_buckets
bucket_counts = [0] * n_buckets

for val in dist.data:

idx = int((val - minv) // bucket_width)
if idx == n_buckets:
idx = n_buckets - 1
bucket_counts[idx] += 1

buckets = []
for i, count in enumerate(bucket_counts):
bucket_start = minv + i * bucket_width
buckets.append({
"value": bucket_start,
"count": count
})
Comment on lines +30 to +46
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure the proper way to generate these buckets or if there is code somewhere else in guidellm that could manage this and I missed it.

But this code assumes we have a set number of buckets we want to generate and then determines the bucket width based off of that. It is a hard coded approach, and some data analysis first might result in a better number of buckets or bucket size. But generally I figured the UI would look good with there being a set number of buckets so our histograms conveniently look the same and take up a comfortable amount of space.


return {
metric_label: {
"statistics": {
"total": total,
"mean": mean,
"median": median,
"min": minv,
"max": maxv,
"std": std_dev,
},
"percentiles": percentile_list,
"buckets": buckets,
"bucketWidth": bucket_width,
}
}

def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
timestamp = max(map(lambda bm: bm.end_time, report.benchmarks))
return {
"model": {
"name": report.args.get('model', 'N/A'),
"size": 0
},
"task": "N/A",
"dataset": "N/A",
"timestamp": timestamp
}

def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
request_over_time_results = []
for benchmark in benchmarks:
# compare benchmark start time to text generation result end time
all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
request_distribution = Distribution(data=request_over_time_values)
result = generate_metric_report(request_distribution, "requestsOverTime")
request_over_time_results.append(result["requestsOverTime"])
return request_over_time_results


def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data]
all_output_token_distribution = Distribution(data=all_output_token_data)

prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples)))
output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))

request_over_time_results = generate_request_over_time_data(report.benchmarks)

return {
"prompts": {
"samples": sample_prompts,
**prompt_token_data
},
"generation": {
"samples": sample_outputs,
**output_token_data
},
"requestsOverTime": request_over_time_results,
"server": {
"target": report.args.get('target', 'N/A')
}
}

def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
tpot_data = generate_metric_report(tpot_dist_ms, 'tpot')
throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data])
throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
return {
"requestsPerSecond": bm.completed_request_rate,
**ttft_data,
**tpot_data,
**throughput_data,
**time_per_request_data,
}

def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
benchmark_report_json = []
for benchmark in benchmarks:
benchmarks_report = generate_benchmark_json(benchmark)
benchmark_report_json.append(benchmarks_report)
return benchmark_report_json

def generate_ui_api_data(report: TextGenerationBenchmarkReport):
run_info_json = generate_run_info(report)
workload_details_json = generate_workload_details(report)
benchmarks_json = generate_benchmarks_json(report.benchmarks)
os.makedirs("ben_test", exist_ok=True)
# generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
with open("ben_test/run_info.json", "w") as f:
json.dump(run_info_json, f, indent=2)
with open("ben_test/workload_details.json", "w") as f:
json.dump(workload_details_json, f, indent=2)
with open("ben_test/benchmarks.json", "w") as f:
json.dump(benchmarks_json, f, indent=2)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for testing purposes, to view the generated json.


print("Reports saved to run_info.json, workload_details.json, benchmarks.json")
Loading