1+ import os
2+ import json
3+ import random
4+ from typing import Any , Dict , List
5+ from guidellm .core .distribution import Distribution
6+ from guidellm .core import TextGenerationBenchmarkReport , TextGenerationBenchmark
7+
8+ def generate_metric_report (dist : Distribution , metric_label : str , n_buckets : int = 18 ):
9+ total = dist .__len__ ()
10+ mean = dist .mean
11+ median = dist .median
12+ minv = dist .min
13+ maxv = dist .max
14+ std_dev = dist .std_deviation
15+
16+ pvals = dist .percentiles ([50 , 90 , 95 , 99 ])
17+
18+ percentile_list = [
19+ {"percentile" : "p50" , "value" : pvals [0 ]},
20+ {"percentile" : "p90" , "value" : pvals [1 ]},
21+ {"percentile" : "p95" , "value" : pvals [2 ]},
22+ {"percentile" : "p99" , "value" : pvals [3 ]},
23+ ]
24+
25+ if dist .range == 0 :
26+ buckets = [{"value" : minv , "count" : total }]
27+ bucket_width = 0
28+ else :
29+ bucket_width = dist .range / n_buckets
30+ bucket_counts = [0 ] * n_buckets
31+
32+ for val in dist .data :
33+
34+ idx = int ((val - minv ) // bucket_width )
35+ if idx == n_buckets :
36+ idx = n_buckets - 1
37+ bucket_counts [idx ] += 1
38+
39+ buckets = []
40+ for i , count in enumerate (bucket_counts ):
41+ bucket_start = minv + i * bucket_width
42+ buckets .append ({
43+ "value" : bucket_start ,
44+ "count" : count
45+ })
46+
47+ return {
48+ metric_label : {
49+ "statistics" : {
50+ "total" : total ,
51+ "mean" : mean ,
52+ "median" : median ,
53+ "min" : minv ,
54+ "max" : maxv ,
55+ "std" : std_dev ,
56+ },
57+ "percentiles" : percentile_list ,
58+ "buckets" : buckets ,
59+ "bucketWidth" : bucket_width ,
60+ }
61+ }
62+
63+ def generate_run_info (report : TextGenerationBenchmarkReport ) -> Dict [str , Any ]:
64+ timestamp = max (map (lambda bm : bm .end_time , report .benchmarks ))
65+ return {
66+ "model" : {
67+ "name" : report .args .get ('model' , 'N/A' ),
68+ "size" : 0
69+ },
70+ "task" : "N/A" ,
71+ "dataset" : "N/A" ,
72+ "timestamp" : timestamp
73+ }
74+
75+ def generate_workload_details (report : TextGenerationBenchmarkReport ) -> Dict [str , Any ]:
76+ all_prompt_token_data = [data for benchmark in report .benchmarks for data in benchmark .prompt_token_distribution .data ]
77+ all_prompt_token_distribution = Distribution (data = all_prompt_token_data )
78+ all_output_token_data = [data for benchmark in report .benchmarks for data in benchmark .output_token_distribution .data ]
79+ all_output_token_distribution = Distribution (data = all_output_token_data )
80+
81+ prompt_token_data = generate_metric_report (all_prompt_token_distribution , "tokenDistributions" )
82+ prompt_token_samples = [result .prompt for benchmark in report .benchmarks for result in benchmark .results ]
83+ sample_prompts = random .sample (prompt_token_samples , min (5 , len (prompt_token_samples )))
84+ output_token_data = generate_metric_report (all_output_token_distribution , "tokenDistributions" )
85+ output_token_samples = [result .output for benchmark in report .benchmarks for result in benchmark .results ]
86+ sample_outputs = random .sample (output_token_samples , min (5 , len (output_token_samples )))
87+ return {
88+ "prompts" : {
89+ "samples" : sample_prompts ,
90+ ** prompt_token_data
91+ },
92+ "generation" : {
93+ "samples" : sample_outputs ,
94+ ** output_token_data
95+ },
96+ "server" : {
97+ "target" : report .args .get ('target' , 'N/A' )
98+ }
99+ }
100+
101+ def generate_benchmark_json (bm : TextGenerationBenchmark ) -> Dict [str , Any ]:
102+ ttft_dist_ms = Distribution (data = [val * 1000 for val in bm .ttft_distribution .data ])
103+ ttft_data = generate_metric_report (ttft_dist_ms , 'ttft' )
104+ tpot_dist_ms = Distribution (data = [val * 1000 for val in bm .itl_distribution .data ])
105+ tpot_data = generate_metric_report (tpot_dist_ms , 'tpot' )
106+ throughput_dist_ms = Distribution (data = [val * 1000 for val in bm .output_token_throughput_distribution .data ])
107+ throughput_data = generate_metric_report (throughput_dist_ms , 'throughput' )
108+ latency_dist_ms = Distribution (data = [val * 1000 for val in bm .request_latency_distribution .data ])
109+ time_per_request_data = generate_metric_report (latency_dist_ms , 'timePerRequest' )
110+ return {
111+ "requestsPerSecond" : bm .completed_request_rate ,
112+ ** ttft_data ,
113+ ** tpot_data ,
114+ ** throughput_data ,
115+ ** time_per_request_data ,
116+ }
117+
118+ def generate_benchmarks_json (benchmarks : List [TextGenerationBenchmark ]):
119+ benchmark_report_json = []
120+ for benchmark in benchmarks :
121+ benchmarks_report = generate_benchmark_json (benchmark )
122+ benchmark_report_json .append (benchmarks_report )
123+ return benchmark_report_json
124+
125+ def generate_ui_api_data (report : TextGenerationBenchmarkReport ):
126+ run_info_json = generate_run_info (report )
127+ workload_details_json = generate_workload_details (report )
128+ benchmarks_json = generate_benchmarks_json (report .benchmarks )
129+ os .makedirs ("ben_test" , exist_ok = True )
130+ # generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
131+ with open ("ben_test/run_info.json" , "w" ) as f :
132+ json .dump (run_info_json , f , indent = 2 )
133+ with open ("ben_test/workload_details.json" , "w" ) as f :
134+ json .dump (workload_details_json , f , indent = 2 )
135+ with open ("ben_test/benchmarks.json" , "w" ) as f :
136+ json .dump (benchmarks_json , f , indent = 2 )
137+
138+ print ("Reports saved to run_info.json, workload_details.json, benchmarks.json" )
0 commit comments