55from typing import Any , Dict , List
66from guidellm .core .distribution import Distribution
77from guidellm .core import TextGenerationBenchmarkReport , TextGenerationBenchmark
8+ from guidellm .utils .interpolation import interpolate_data_points
89
910def generate_metric_report (dist : Distribution , metric_label : str , n_buckets : int = 18 ):
1011 total = dist .__len__ ()
@@ -61,8 +62,8 @@ def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int
6162 }
6263 }
6364
64- def generate_run_info (report : TextGenerationBenchmarkReport ) -> Dict [str , Any ]:
65- timestamp = max (map ( lambda bm : bm .end_time , report . benchmarks ) )
65+ def generate_run_info (report : TextGenerationBenchmarkReport , benchmarks : List [ TextGenerationBenchmark ] ) -> Dict [str , Any ]:
66+ timestamp = max (bm . start_time for bm in benchmarks if bm .start_time is not None )
6667 return {
6768 "model" : {
6869 "name" : report .args .get ('model' , 'N/A' ),
@@ -80,91 +81,109 @@ def linearly_interpolate_value(target_input, lower_input, lower_output, upperInp
8081 return lower_output + fraction * (upper_output - lower_output )
8182
8283def generate_request_over_time_data (benchmarks : List [TextGenerationBenchmark ]) -> List [Dict [str , Any ]]:
84+ filtered_benchmarks = filter (lambda bm : bm .start_time is not None , benchmarks )
85+ sorted_benchmarks = list (sorted (filtered_benchmarks , key = lambda bm : bm .start_time ))
86+ min_start_time = sorted_benchmarks [0 ].start_time
8387
84- request_over_time_results = []
85- for benchmark in benchmarks :
86- # compare benchmark start time to text generation result end time
87- all_result_end_times = [result .end_time for result in benchmark .results if result .end_time is not None ]
88- request_over_time_values = list (map (lambda time : time - benchmark .start_time , all_result_end_times ))
89- request_distribution = Distribution (data = request_over_time_values )
90- result = generate_metric_report (request_distribution , "requestsOverTime" )
91- result ["requestsPerSecond" ] = benchmark .completed_request_rate
92- request_over_time_results .append (result )
88+ all_request_times = [
89+ result .start_time - min_start_time
90+ for benchmark in sorted_benchmarks
91+ for result in benchmark .results
92+ if result .start_time is not None
93+ ]
9394
94- if len (benchmarks ) == 1 :
95- return request_over_time_results
95+ request_distribution = Distribution (data = all_request_times )
96+ final_result = generate_metric_report (request_distribution , "requestsOverTime" )
97+ return { "numBenchmarks" : len (sorted_benchmarks ), ** final_result }
98+
99+ # def generate_request_over_time_data_per_benchmark(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
100+
101+ # request_over_time_results = []
102+ # for benchmark in benchmarks:
103+ # # compare benchmark start time to text generation result end time
104+ # all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
105+ # request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
106+ # request_distribution = Distribution(data=request_over_time_values)
107+ # result = generate_metric_report(request_distribution, "requestsOverTime")
108+ # result["requestsPerSecond"] = benchmark.completed_request_rate
109+ # request_over_time_results.append(result)
110+
111+ # if len(benchmarks) == 1:
112+ # return request_over_time_results
96113
97- request_over_time_raw = []
98- sorted_bm = sorted (benchmarks , key = lambda bm : bm .completed_request_rate )
99- for benchmark in sorted_bm :
100- # compare benchmark start time to text generation result end time
101- all_result_end_times = [result .end_time for result in benchmark .results if result .end_time is not None ]
102- request_over_time_values = list (map (lambda time : time - benchmark .start_time , all_result_end_times ))
103- request_at_rps = { "rps" : benchmark .completed_request_rate , "requests_over_time" : request_over_time_values }
104- request_over_time_raw .append (request_at_rps )
105-
106- rps_values = [request_obj ["rps" ] for request_obj in request_over_time_raw ]
107- rps_range = list (range (math .ceil (min (rps_values )), math .ceil (max (rps_values ))))
108- interpolated_request_values = []
109- lower_rps_index = 0
110- for rps in rps_range :
111- if rps > rps_values [lower_rps_index + 1 ]: lower_rps_index += 1
112- if rps == rps_values [lower_rps_index ]:
113- interpolated_request_values .append ({
114- "requests_per_second" : rps ,
115- "requests_over_time" : request_over_time_raw [lower_rps_index ]["requests_over_time" ][:]
116- })
117- lower_rps_index += 1
118- elif rps < rps_values [lower_rps_index + 1 ]:
119- interpolated_requests_at_new_rps = []
120- for i in range (len (request_over_time_raw [lower_rps_index ]["requests_over_time" ])):
121- lower_request = request_over_time_raw [lower_rps_index ]["requests_over_time" ][i ]
122- upper_request = request_over_time_raw [lower_rps_index + 1 ]["requests_over_time" ][i ]
123- new_value = linearly_interpolate_value (rps , rps_values [lower_rps_index ], lower_request , rps_values [lower_rps_index + 1 ], upper_request )
124- interpolated_requests_at_new_rps .append (new_value )
125- interpolated_request_values .append ({ "requests_per_second" : rps , "requests_over_time" : interpolated_requests_at_new_rps })
126- elif rps > rps_values [lower_rps_index + 1 ]:
127- while rps > rps_values [lower_rps_index + 1 ]:
128- lower_rps_index += 1
129- interpolated_requests_at_new_rps = []
130- for i in range (len (request_over_time_raw [lower_rps_index ]["requests_over_time" ])):
131- lower_request = request_over_time_raw [lower_rps_index ]["requests_over_time" ][i ]
132- upper_request = request_over_time_raw [lower_rps_index + 1 ]["requests_over_time" ][i ]
133- new_value = linearly_interpolate_value (rps , rps_values [lower_rps_index ], lower_request , rps_values [lower_rps_index + 1 ], upper_request )
134- interpolated_requests_at_new_rps .append (new_value )
135- interpolated_request_values .append ({ "requests_per_second" : rps , "requests_over_time" : interpolated_requests_at_new_rps })
136- interpolated_request_over_time_results = []
137- for request_value in interpolated_request_values :
138- request_distribution = Distribution (data = request_value ["requests_over_time" ])
139- result = generate_metric_report (request_distribution , "requestsOverTime" )
140- result ["requestsPerSecond" ] = request_value ["requests_per_second" ]
141- interpolated_request_over_time_results .append (result )
142-
143- return interpolated_request_over_time_results
144-
145-
146- def generate_workload_details (report : TextGenerationBenchmarkReport ) -> Dict [str , Any ]:
147- all_prompt_token_data = [data for benchmark in report .benchmarks for data in benchmark .prompt_token_distribution .data ]
114+ # request_over_time_raw = []
115+ # sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
116+ # for benchmark in sorted_bm:
117+ # # compare benchmark start time to text generation result end time
118+ # all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
119+ # request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
120+ # request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
121+ # request_over_time_raw.append(request_at_rps)
122+
123+ # rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
124+ # rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
125+ # interpolated_request_values = []
126+ # lower_rps_index = 0
127+ # for rps in rps_range:
128+ # if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
129+ # if rps == rps_values[lower_rps_index]:
130+ # interpolated_request_values.append({
131+ # "requests_per_second": rps,
132+ # "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
133+ # })
134+ # lower_rps_index += 1
135+ # elif rps < rps_values[lower_rps_index + 1]:
136+ # interpolated_requests_at_new_rps = []
137+ # for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
138+ # lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
139+ # upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
140+ # new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
141+ # interpolated_requests_at_new_rps.append(new_value)
142+ # interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
143+ # elif rps > rps_values[lower_rps_index + 1]:
144+ # while rps > rps_values[lower_rps_index + 1]:
145+ # lower_rps_index += 1
146+ # interpolated_requests_at_new_rps = []
147+ # for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
148+ # lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
149+ # upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
150+ # new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
151+ # interpolated_requests_at_new_rps.append(new_value)
152+ # interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
153+ # interpolated_request_over_time_results = []
154+ # for request_value in interpolated_request_values:
155+ # request_distribution = Distribution(data=request_value["requests_over_time"])
156+ # result = generate_metric_report(request_distribution, "requestsOverTime")
157+ # result["requestsPerSecond"] = request_value["requests_per_second"]
158+ # interpolated_request_over_time_results.append(result)
159+ # return { "rawData": request_over_time_results, "interpolatedData": interpolated_request_over_time_results }
160+
161+
162+ def generate_workload_details (report : TextGenerationBenchmarkReport , benchmarks : List [TextGenerationBenchmark ]) -> Dict [str , Any ]:
163+ all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark .prompt_token_distribution .data ]
148164 all_prompt_token_distribution = Distribution (data = all_prompt_token_data )
149- all_output_token_data = [data for benchmark in report . benchmarks for data in benchmark .output_token_distribution .data ]
165+ all_output_token_data = [data for benchmark in benchmarks for data in benchmark .output_token_distribution .data ]
150166 all_output_token_distribution = Distribution (data = all_output_token_data )
151167
152168 prompt_token_data = generate_metric_report (all_prompt_token_distribution , "tokenDistributions" )
153169 output_token_data = generate_metric_report (all_output_token_distribution , "tokenDistributions" )
154170
155- prompt_token_samples = [result .prompt for benchmark in report . benchmarks for result in benchmark .results ]
156- output_token_samples = [result .output for benchmark in report . benchmarks for result in benchmark .results ]
171+ prompt_token_samples = [result .prompt for benchmark in benchmarks for result in benchmark .results ]
172+ output_token_samples = [result .output for benchmark in benchmarks for result in benchmark .results ]
157173
158174 num_samples = min (5 , len (prompt_token_samples ), len (output_token_samples ))
159175 sample_indices = random .sample (range (len (prompt_token_samples )), num_samples )
160176
161177 sample_prompts = [prompt_token_samples [i ] for i in sample_indices ]
178+ """
179+ Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
180+ """
162181 sample_prompts = list (map (lambda prompt : prompt .replace ("\n " , " " ).replace ("\" " , "'" ), sample_prompts ))
163182
164183 sample_outputs = [output_token_samples [i ] for i in sample_indices ]
165184 sample_outputs = list (map (lambda output : output .replace ("\n " , " " ).replace ("\" " , "'" ), sample_outputs ))
166185
167- request_over_time_results = generate_request_over_time_data (report . benchmarks )
186+ request_over_time_results = generate_request_over_time_data (benchmarks )
168187
169188 return {
170189 "prompts" : {
@@ -184,35 +203,92 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
184203def generate_benchmark_json (bm : TextGenerationBenchmark ) -> Dict [str , Any ]:
185204 ttft_dist_ms = Distribution (data = [val * 1000 for val in bm .ttft_distribution .data ])
186205 ttft_data = generate_metric_report (ttft_dist_ms , 'ttft' )
187- tpot_dist_ms = Distribution (data = [val * 1000 for val in bm .itl_distribution .data ])
188- tpot_data = generate_metric_report (tpot_dist_ms , 'tpot' )
189- throughput_dist_ms = Distribution (data = [ val * 1000 for val in bm .output_token_throughput_distribution .data ] )
206+ itl_dist_ms = Distribution (data = [val * 1000 for val in bm .itl_distribution .data ])
207+ itl_data = generate_metric_report (itl_dist_ms , 'tpot' )
208+ throughput_dist_ms = Distribution (data = bm .output_token_throughput_distribution .data )
190209 throughput_data = generate_metric_report (throughput_dist_ms , 'throughput' )
191210 latency_dist_ms = Distribution (data = [val * 1000 for val in bm .request_latency_distribution .data ])
192- time_per_request_data = generate_metric_report (latency_dist_ms , 'timePerRequest' )
211+ latency__data = generate_metric_report (latency_dist_ms , 'timePerRequest' )
193212 return {
194213 "requestsPerSecond" : bm .completed_request_rate ,
214+ ** itl_data ,
195215 ** ttft_data ,
196- ** tpot_data ,
197216 ** throughput_data ,
198- ** time_per_request_data ,
217+ ** latency__data ,
199218 }
200219
220+ def generate_interpolated_benchmarks (benchmarks : List [TextGenerationBenchmark ]):
221+ """
222+ Should we only use constant rate benchmarks here since synchronous and throughput runs might not be appropriate to lump in for interoplation across all rps?
223+
224+ Other edge-case, what if rps doesn't span more than 1 whole rps even with multiple benchmarks
225+ ex: 1.1, 1.3, 1.5, 2.1, 2.5, can interpolate at 2rps
226+ or worse, 1.1, 1.4, 1.6, can't interpolate
227+ """
228+ if len (benchmarks ) == 1 :
229+ return []
230+
231+ sorted_benchmarks = sorted (benchmarks [:], key = lambda bm : bm .completed_request_rate )
232+ rps_values = [bm .completed_request_rate for bm in sorted_benchmarks ]
233+ rps_range = list (range (math .ceil (min (rps_values )), math .ceil (max (rps_values ))))
234+
235+ ttft_data_by_rps = list (map (lambda bm : (bm .completed_request_rate , bm .ttft_distribution .data ), sorted_benchmarks ))
236+ interpolated_ttft_data_by_rps = interpolate_data_points (ttft_data_by_rps , rps_range )
237+
238+ itl_data_by_rps = list (map (lambda bm : (bm .completed_request_rate , bm .itl_distribution .data ), sorted_benchmarks ))
239+ interpolated_itl_data_by_rps = interpolate_data_points (itl_data_by_rps , rps_range )
240+
241+ throughput_data_by_rps = list (map (lambda bm : (bm .completed_request_rate , bm .output_token_throughput_distribution .data ), sorted_benchmarks ))
242+ interpolated_throughput_data_by_rps = interpolate_data_points (throughput_data_by_rps , rps_range )
243+
244+ latency_data_by_rps = list (map (lambda bm : (bm .completed_request_rate , bm .request_latency_distribution .data ), sorted_benchmarks ))
245+ interpolated_latency_data_by_rps = interpolate_data_points (latency_data_by_rps , rps_range )
246+
247+ benchmark_json = []
248+ for i in range (len (interpolated_ttft_data_by_rps )):
249+ rps , interpolated_ttft_data = interpolated_ttft_data_by_rps [i ]
250+ ttft_dist_ms = Distribution (data = [val * 1000 for val in interpolated_ttft_data ])
251+ final_ttft_data = generate_metric_report (ttft_dist_ms , 'ttft' )
252+
253+ _ , interpolated_itl_data = interpolated_itl_data_by_rps [i ]
254+ itl_dist_ms = Distribution (data = [val * 1000 for val in interpolated_itl_data ])
255+ final_itl_data = generate_metric_report (itl_dist_ms , 'tpot' )
256+
257+ _ , interpolated_throughput_data = interpolated_throughput_data_by_rps [i ]
258+ throughput_dist_ms = Distribution (data = interpolated_throughput_data )
259+ final_throughput_data = generate_metric_report (throughput_dist_ms , 'throughput' )
260+
261+ _ , interpolated_latency_data = interpolated_latency_data_by_rps [i ]
262+ latency_dist_ms = Distribution (data = [val * 1000 for val in interpolated_latency_data ])
263+ final_latency_data = generate_metric_report (latency_dist_ms , 'timePerRequest' )
264+
265+ benchmark_json .append ({
266+ "requestsPerSecond" : rps ,
267+ ** final_itl_data ,
268+ ** final_ttft_data ,
269+ ** final_throughput_data ,
270+ ** final_latency_data ,
271+ })
272+ return benchmark_json
273+
201274def generate_benchmarks_json (benchmarks : List [TextGenerationBenchmark ]):
202- benchmark_report_json = []
275+ raw_benchmark_json = []
203276 for benchmark in benchmarks :
204277 benchmarks_report = generate_benchmark_json (benchmark )
205- benchmark_report_json .append (benchmarks_report )
206- return benchmark_report_json
278+ raw_benchmark_json .append (benchmarks_report )
279+ interpolated_benchmark_json = generate_interpolated_benchmarks (benchmarks )
280+
281+ return { "raw" : raw_benchmark_json , "interpolated_by_rps" : interpolated_benchmark_json }
207282
208283def generate_js_variable (variable_name : str , data : dict ) -> str :
209284 json_data = json .dumps (data , indent = 2 )
210285 return f'`window.{ variable_name } = { json_data } ;`' # Wrap in quotes
211286
212287def generate_ui_api_data (report : TextGenerationBenchmarkReport ):
213- run_info_data = generate_run_info (report )
214- workload_details_data = generate_workload_details (report )
215- benchmarks_data = generate_benchmarks_json (report .benchmarks )
288+ filtered_benchmarks = list (filter (lambda bm : bm .completed_request_rate > 0 , report .benchmarks ))
289+ run_info_data = generate_run_info (report , filtered_benchmarks )
290+ workload_details_data = generate_workload_details (report , filtered_benchmarks )
291+ benchmarks_data = generate_benchmarks_json (filtered_benchmarks )
216292 run_info_script = generate_js_variable ("run_info" , run_info_data )
217293 workload_details_script = generate_js_variable ("workload_details" , workload_details_data )
218294 benchmarks_script = generate_js_variable ("benchmarks" , benchmarks_data )
0 commit comments