vllm-project · DaltheCow · Mar 5, 2025 · Mar 5, 2025 · Mar 7, 2025 · Mar 11, 2025
diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
@@ -403,6 +403,22 @@ def output_token_throughput(self) -> float:
 
         return total_tokens / self.duration
 
+    @property
+    def output_token_throughput_distribution(self) -> Distribution:
+        """
+        Get the distribution for output token throughput.
+
+        :return: The distribution of output token throughput.
+        :rtype: Distribution
+        """
+        throughputs = []
+        for r in self.results:
+            duration = (r.end_time or 0) - (r.start_time or 0)
+            if duration > 0:
+                throughputs.append(r.output_token_count / duration)
+
+        return Distribution(data=throughputs)
+
     @property
     def prompt_token_distribution(self) -> Distribution:
         """

diff --git a/src/guidellm/main.py b/src/guidellm/main.py
@@ -14,6 +14,7 @@
 )
 from guidellm.request.base import RequestGenerator
 from guidellm.utils import BenchmarkReportProgress, cli_params
+from guidellm.utils.generate_ui_data import generate_ui_api_data
 
 __all__ = ["generate_benchmark_report"]
 
@@ -183,7 +184,6 @@ def generate_benchmark_report_cli(
         cont_refresh_table=enable_continuous_refresh,
     )
 
-
 def generate_benchmark_report(
     target: str,
     backend: BackendEnginePublic,
@@ -285,6 +285,8 @@ def generate_benchmark_report(
     )
     report = asyncio.run(_run_executor_for_result(executor))
 
+    generate_ui_api_data(report)
+
     # Save and print report
     guidance_report = GuidanceReport()
     guidance_report.benchmarks.append(report)

diff --git a/src/guidellm/utils/__init__.py b/src/guidellm/utils/__init__.py
@@ -1,4 +1,5 @@
 from .injector import create_report, inject_data
+from .generate_ui_data import generate_ui_api_data
 from .progress import BenchmarkReportProgress
 from .text import (
     clean_text,
@@ -24,6 +25,7 @@
     "clean_text",
     "create_report",
     "filter_text",
+    "generate_ui_api_data",
     "inject_data",
     "is_path",
     "is_path_like",

diff --git a/src/guidellm/utils/generate_ui_data.py b/src/guidellm/utils/generate_ui_data.py
@@ -0,0 +1,154 @@
+import os
+import json
+import random
+from typing import Any, Dict, List
+from guidellm.core.distribution import Distribution
+from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
+
+def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
+    total = dist.__len__()
+    mean = dist.mean
+    median = dist.median
+    minv = dist.min
+    maxv = dist.max
+    std_dev = dist.std_deviation
+
+    pvals = dist.percentiles([50, 90, 95, 99])
+
+    percentile_list = [
+        {"percentile": "p50", "value": pvals[0]},
+        {"percentile": "p90", "value": pvals[1]},
+        {"percentile": "p95", "value": pvals[2]},
+        {"percentile": "p99", "value": pvals[3]},
+    ]
+
+    if dist.range == 0:
+        buckets = [{"value": minv, "count": total}]
+        bucket_width = 0
+    else:
+        bucket_width = dist.range / n_buckets
+        bucket_counts = [0] * n_buckets
+
+        for val in dist.data:
+
+            idx = int((val - minv) // bucket_width)
+            if idx == n_buckets:
+                idx = n_buckets - 1
+            bucket_counts[idx] += 1
+
+        buckets = []
+        for i, count in enumerate(bucket_counts):
+            bucket_start = minv + i * bucket_width
+            buckets.append({
+                "value": bucket_start,
+                "count": count
+            })
+
+    return {
+        metric_label: {
+            "statistics": {
+                "total": total,
+                "mean": mean,
+                "median": median,
+                "min": minv,
+                "max": maxv,
+                "std": std_dev,
+            },
+            "percentiles": percentile_list,
+            "buckets": buckets,
+            "bucketWidth": bucket_width,
+        }
+    }
+
+def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
+    timestamp = max(map(lambda bm: bm.end_time, report.benchmarks))
+    return {
+        "model": {
+            "name": report.args.get('model', 'N/A'),
+            "size": 0
+        },
+        "task": "N/A",
+        "dataset": "N/A",
+        "timestamp": timestamp
+    }
+
+def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
+    request_over_time_results = []
+    for benchmark in benchmarks:
+        # compare benchmark start time to text generation result end time
+        all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
+        request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
+        request_distribution = Distribution(data=request_over_time_values)
+        result = generate_metric_report(request_distribution, "requestsOverTime")
+        request_over_time_results.append(result["requestsOverTime"])
+    return request_over_time_results
+
+
+def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
+    all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
+    all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
+    all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data]
+    all_output_token_distribution = Distribution(data=all_output_token_data)
+
+    prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
+    prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
+    sample_prompts = random.sample(prompt_token_samples, min(5, len(prompt_token_samples)))
+    output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
+    output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
+    sample_outputs = random.sample(output_token_samples, min(5, len(output_token_samples)))
+
+    request_over_time_results = generate_request_over_time_data(report.benchmarks)
+
+    return {
+        "prompts": {
+            "samples": sample_prompts,
+            **prompt_token_data
+        },
+        "generation": {
+            "samples": sample_outputs,
+            **output_token_data
+        },
+        "requestsOverTime": request_over_time_results,
+        "server": {
+            "target": report.args.get('target', 'N/A')
+        }
+    }
+
+def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
+    ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
+    ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
+    tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
+    tpot_data = generate_metric_report(tpot_dist_ms, 'tpot')
+    throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data])
+    throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
+    latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
+    time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
+    return {
+        "requestsPerSecond": bm.completed_request_rate,
+        **ttft_data,
+        **tpot_data,
+        **throughput_data,
+        **time_per_request_data,
+    }
+
+def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
+    benchmark_report_json = []
+    for benchmark in benchmarks:
+        benchmarks_report = generate_benchmark_json(benchmark)
+        benchmark_report_json.append(benchmarks_report)
+    return benchmark_report_json
+
+def generate_ui_api_data(report: TextGenerationBenchmarkReport):
+    run_info_json = generate_run_info(report)
+    workload_details_json = generate_workload_details(report)
+    benchmarks_json = generate_benchmarks_json(report.benchmarks)
+    os.makedirs("ben_test", exist_ok=True)
+    # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
+    with open("ben_test/run_info.json", "w") as f:
+        json.dump(run_info_json, f, indent=2)
+    with open("ben_test/workload_details.json", "w") as f:
+        json.dump(workload_details_json, f, indent=2)
+    with open("ben_test/benchmarks.json", "w") as f:
+        json.dump(benchmarks_json, f, indent=2)
+
+    print("Reports saved to run_info.json, workload_details.json, benchmarks.json")