-
Notifications
You must be signed in to change notification settings - Fork 88
Set up data for ui - WIP #87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
7e90a73
f870c20
d1bbc0c
d59cada
76fc2b4
1833518
8c63255
9691ef3
a016585
656e6dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
) | ||
from guidellm.request.base import RequestGenerator | ||
from guidellm.utils import BenchmarkReportProgress, cli_params | ||
from guidellm.utils.generate_ui_data import generate_ui_api_data | ||
|
||
__all__ = ["generate_benchmark_report"] | ||
|
||
|
@@ -183,7 +184,6 @@ def generate_benchmark_report_cli( | |
cont_refresh_table=enable_continuous_refresh, | ||
) | ||
|
||
|
||
def generate_benchmark_report( | ||
target: str, | ||
backend: BackendEnginePublic, | ||
|
@@ -285,6 +285,8 @@ def generate_benchmark_report( | |
) | ||
report = asyncio.run(_run_executor_for_result(executor)) | ||
|
||
generate_ui_api_data(report) | ||
|
||
|
||
# Save and print report | ||
guidance_report = GuidanceReport() | ||
guidance_report.benchmarks.append(report) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
import os | ||
import json | ||
import random | ||
from typing import Any, Dict, List | ||
from guidellm.core.distribution import Distribution | ||
from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark | ||
|
||
def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18): | ||
total = dist.__len__() | ||
mean = dist.mean | ||
median = dist.median | ||
minv = dist.min | ||
maxv = dist.max | ||
std_dev = dist.std_deviation | ||
|
||
pvals = dist.percentiles([50, 90, 95, 99]) | ||
|
||
percentile_list = [ | ||
{"percentile": "p50", "value": pvals[0]}, | ||
{"percentile": "p90", "value": pvals[1]}, | ||
{"percentile": "p95", "value": pvals[2]}, | ||
{"percentile": "p99", "value": pvals[3]}, | ||
] | ||
|
||
if dist.range == 0: | ||
buckets = [{"value": minv, "count": total}] | ||
bucket_width = 0 | ||
else: | ||
bucket_width = dist.range / n_buckets | ||
bucket_counts = [0] * n_buckets | ||
|
||
for val in dist.data: | ||
|
||
idx = int((val - minv) // bucket_width) | ||
if idx == n_buckets: | ||
idx = n_buckets - 1 | ||
bucket_counts[idx] += 1 | ||
|
||
buckets = [] | ||
for i, count in enumerate(bucket_counts): | ||
bucket_start = minv + i * bucket_width | ||
buckets.append({ | ||
"value": bucket_start, | ||
"count": count | ||
}) | ||
Comment on lines
+30
to
+46
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure the proper way to generate these buckets or if there is code somewhere else in guidellm that could manage this and I missed it. But this code assumes we have a set number of buckets we want to generate and then determines the bucket width based off of that. It is a hard coded approach, and some data analysis first might result in a better number of buckets or bucket size. But generally I figured the UI would look good with there being a set number of buckets so our histograms conveniently look the same and take up a comfortable amount of space. |
||
|
||
return { | ||
metric_label: { | ||
"statistics": { | ||
"total": total, | ||
"mean": mean, | ||
"median": median, | ||
"min": minv, | ||
"max": maxv, | ||
"std": std_dev, | ||
}, | ||
"percentiles": percentile_list, | ||
"buckets": buckets, | ||
"bucketWidth": bucket_width, | ||
} | ||
} | ||
|
||
def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]: | ||
timestamp = max(map(lambda bm: bm.end_time, report.benchmarks)) | ||
return { | ||
"model": { | ||
"name": report.args.get('model', 'N/A'), | ||
"size": 0 | ||
}, | ||
"task": "N/A", | ||
"dataset": "N/A", | ||
"timestamp": timestamp | ||
} | ||
|
||
def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]: | ||
request_over_time_results = [] | ||
for benchmark in benchmarks: | ||
# compare benchmark start time to text generation result end time | ||
all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None] | ||
request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times)) | ||
request_distribution = Distribution(data=request_over_time_values) | ||
result = generate_metric_report(request_distribution, "requestsOverTime") | ||
request_over_time_results.append(result["requestsOverTime"]) | ||
return request_over_time_results | ||
|
||
|
||
def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]: | ||
all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data] | ||
all_prompt_token_distribution = Distribution(data=all_prompt_token_data) | ||
all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data] | ||
all_output_token_distribution = Distribution(data=all_output_token_data) | ||
|
||
prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions") | ||
prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results] | ||
sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples))) | ||
output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions") | ||
output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results] | ||
sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples))) | ||
|
||
request_over_time_results = generate_request_over_time_data(report.benchmarks) | ||
|
||
return { | ||
"prompts": { | ||
"samples": sample_prompts, | ||
**prompt_token_data | ||
}, | ||
"generation": { | ||
"samples": sample_outputs, | ||
**output_token_data | ||
}, | ||
"requestsOverTime": request_over_time_results, | ||
"server": { | ||
"target": report.args.get('target', 'N/A') | ||
} | ||
} | ||
|
||
def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]: | ||
ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data]) | ||
ttft_data = generate_metric_report(ttft_dist_ms, 'ttft') | ||
tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data]) | ||
tpot_data = generate_metric_report(tpot_dist_ms, 'tpot') | ||
throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data]) | ||
throughput_data = generate_metric_report(throughput_dist_ms, 'throughput') | ||
latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data]) | ||
time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest') | ||
return { | ||
"requestsPerSecond": bm.completed_request_rate, | ||
**ttft_data, | ||
**tpot_data, | ||
**throughput_data, | ||
**time_per_request_data, | ||
} | ||
|
||
def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]): | ||
benchmark_report_json = [] | ||
for benchmark in benchmarks: | ||
benchmarks_report = generate_benchmark_json(benchmark) | ||
benchmark_report_json.append(benchmarks_report) | ||
return benchmark_report_json | ||
|
||
def generate_ui_api_data(report: TextGenerationBenchmarkReport): | ||
run_info_json = generate_run_info(report) | ||
workload_details_json = generate_workload_details(report) | ||
benchmarks_json = generate_benchmarks_json(report.benchmarks) | ||
os.makedirs("ben_test", exist_ok=True) | ||
# generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI | ||
with open("ben_test/run_info.json", "w") as f: | ||
json.dump(run_info_json, f, indent=2) | ||
with open("ben_test/workload_details.json", "w") as f: | ||
json.dump(workload_details_json, f, indent=2) | ||
with open("ben_test/benchmarks.json", "w") as f: | ||
json.dump(benchmarks_json, f, indent=2) | ||
|
||
|
||
print("Reports saved to run_info.json, workload_details.json, benchmarks.json") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The UI relies on the output throughput distribution, and I didn't find any methods/properties that were in the token/(unit of time) shape the UI expects so I added this.