Skip to content

Commit 76fc2b4

Browse files
committed
add interpolation of benchmark metrics by rps, remove interpolation of request over time data and use raw, refactor and test interpolation functionality
1 parent d59cada commit 76fc2b4

File tree

4 files changed

+277
-81
lines changed

4 files changed

+277
-81
lines changed

src/guidellm/utils/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from .injector import create_report, inject_data
21
from .generate_ui_data import generate_ui_api_data
2+
from .interpolation import linear_interpolate, interpolate_measurements, interpolate_data_points, stretch_list
3+
from .injector import create_report, inject_data
34
from .progress import BenchmarkReportProgress
45
from .text import (
56
clean_text,
@@ -27,9 +28,12 @@
2728
"filter_text",
2829
"generate_ui_api_data",
2930
"inject_data",
31+
"interpolate_data_points",
32+
"interpolate_measurements",
3033
"is_path",
3134
"is_path_like",
3235
"is_url",
36+
"linear_interpolate",
3337
"load_text",
3438
"load_text_lines",
3539
"load_transformers_dataset",
@@ -39,4 +43,5 @@
3943
"resolve_transformers_dataset_split",
4044
"split_lines_by_punctuation",
4145
"split_text",
46+
"stretch_list",
4247
]

src/guidellm/utils/generate_ui_data.py

Lines changed: 156 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Any, Dict, List
66
from guidellm.core.distribution import Distribution
77
from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark
8+
from guidellm.utils.interpolation import interpolate_data_points
89

910
def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
1011
total = dist.__len__()
@@ -61,8 +62,8 @@ def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int
6162
}
6263
}
6364

64-
def generate_run_info(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
65-
timestamp = max(map(lambda bm: bm.end_time, report.benchmarks))
65+
def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
66+
timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None)
6667
return {
6768
"model": {
6869
"name": report.args.get('model', 'N/A'),
@@ -80,91 +81,109 @@ def linearly_interpolate_value(target_input, lower_input, lower_output, upperInp
8081
return lower_output + fraction * (upper_output - lower_output)
8182

8283
def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
84+
filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
85+
sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
86+
min_start_time = sorted_benchmarks[0].start_time
8387

84-
request_over_time_results = []
85-
for benchmark in benchmarks:
86-
# compare benchmark start time to text generation result end time
87-
all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
88-
request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
89-
request_distribution = Distribution(data=request_over_time_values)
90-
result = generate_metric_report(request_distribution, "requestsOverTime")
91-
result["requestsPerSecond"] = benchmark.completed_request_rate
92-
request_over_time_results.append(result)
88+
all_request_times = [
89+
result.start_time - min_start_time
90+
for benchmark in sorted_benchmarks
91+
for result in benchmark.results
92+
if result.start_time is not None
93+
]
9394

94-
if len(benchmarks) == 1:
95-
return request_over_time_results
95+
request_distribution = Distribution(data=all_request_times)
96+
final_result = generate_metric_report(request_distribution, "requestsOverTime")
97+
return { "numBenchmarks": len(sorted_benchmarks), **final_result }
98+
99+
# def generate_request_over_time_data_per_benchmark(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
100+
101+
# request_over_time_results = []
102+
# for benchmark in benchmarks:
103+
# # compare benchmark start time to text generation result end time
104+
# all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
105+
# request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
106+
# request_distribution = Distribution(data=request_over_time_values)
107+
# result = generate_metric_report(request_distribution, "requestsOverTime")
108+
# result["requestsPerSecond"] = benchmark.completed_request_rate
109+
# request_over_time_results.append(result)
110+
111+
# if len(benchmarks) == 1:
112+
# return request_over_time_results
96113

97-
request_over_time_raw = []
98-
sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
99-
for benchmark in sorted_bm:
100-
# compare benchmark start time to text generation result end time
101-
all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
102-
request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
103-
request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
104-
request_over_time_raw.append(request_at_rps)
105-
106-
rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
107-
rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
108-
interpolated_request_values = []
109-
lower_rps_index = 0
110-
for rps in rps_range:
111-
if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
112-
if rps == rps_values[lower_rps_index]:
113-
interpolated_request_values.append({
114-
"requests_per_second": rps,
115-
"requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
116-
})
117-
lower_rps_index += 1
118-
elif rps < rps_values[lower_rps_index + 1]:
119-
interpolated_requests_at_new_rps = []
120-
for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
121-
lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
122-
upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
123-
new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
124-
interpolated_requests_at_new_rps.append(new_value)
125-
interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
126-
elif rps > rps_values[lower_rps_index + 1]:
127-
while rps > rps_values[lower_rps_index + 1]:
128-
lower_rps_index += 1
129-
interpolated_requests_at_new_rps = []
130-
for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
131-
lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
132-
upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
133-
new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
134-
interpolated_requests_at_new_rps.append(new_value)
135-
interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
136-
interpolated_request_over_time_results = []
137-
for request_value in interpolated_request_values:
138-
request_distribution = Distribution(data=request_value["requests_over_time"])
139-
result = generate_metric_report(request_distribution, "requestsOverTime")
140-
result["requestsPerSecond"] = request_value["requests_per_second"]
141-
interpolated_request_over_time_results.append(result)
142-
143-
return interpolated_request_over_time_results
144-
145-
146-
def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str, Any]:
147-
all_prompt_token_data = [data for benchmark in report.benchmarks for data in benchmark.prompt_token_distribution.data]
114+
# request_over_time_raw = []
115+
# sorted_bm = sorted(benchmarks, key=lambda bm: bm.completed_request_rate)
116+
# for benchmark in sorted_bm:
117+
# # compare benchmark start time to text generation result end time
118+
# all_result_end_times = [result.end_time for result in benchmark.results if result.end_time is not None]
119+
# request_over_time_values = list(map(lambda time: time - benchmark.start_time, all_result_end_times))
120+
# request_at_rps = { "rps": benchmark.completed_request_rate, "requests_over_time": request_over_time_values }
121+
# request_over_time_raw.append(request_at_rps)
122+
123+
# rps_values = [request_obj["rps"] for request_obj in request_over_time_raw]
124+
# rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
125+
# interpolated_request_values = []
126+
# lower_rps_index = 0
127+
# for rps in rps_range:
128+
# if rps > rps_values[lower_rps_index + 1]: lower_rps_index += 1
129+
# if rps == rps_values[lower_rps_index]:
130+
# interpolated_request_values.append({
131+
# "requests_per_second": rps,
132+
# "requests_over_time": request_over_time_raw[lower_rps_index]["requests_over_time"][:]
133+
# })
134+
# lower_rps_index += 1
135+
# elif rps < rps_values[lower_rps_index + 1]:
136+
# interpolated_requests_at_new_rps = []
137+
# for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
138+
# lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
139+
# upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
140+
# new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
141+
# interpolated_requests_at_new_rps.append(new_value)
142+
# interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
143+
# elif rps > rps_values[lower_rps_index + 1]:
144+
# while rps > rps_values[lower_rps_index + 1]:
145+
# lower_rps_index += 1
146+
# interpolated_requests_at_new_rps = []
147+
# for i in range(len(request_over_time_raw[lower_rps_index]["requests_over_time"])):
148+
# lower_request = request_over_time_raw[lower_rps_index]["requests_over_time"][i]
149+
# upper_request = request_over_time_raw[lower_rps_index + 1]["requests_over_time"][i]
150+
# new_value = linearly_interpolate_value(rps, rps_values[lower_rps_index], lower_request, rps_values[lower_rps_index + 1], upper_request)
151+
# interpolated_requests_at_new_rps.append(new_value)
152+
# interpolated_request_values.append({ "requests_per_second": rps, "requests_over_time": interpolated_requests_at_new_rps })
153+
# interpolated_request_over_time_results = []
154+
# for request_value in interpolated_request_values:
155+
# request_distribution = Distribution(data=request_value["requests_over_time"])
156+
# result = generate_metric_report(request_distribution, "requestsOverTime")
157+
# result["requestsPerSecond"] = request_value["requests_per_second"]
158+
# interpolated_request_over_time_results.append(result)
159+
# return { "rawData": request_over_time_results, "interpolatedData": interpolated_request_over_time_results }
160+
161+
162+
def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
163+
all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]
148164
all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
149-
all_output_token_data = [data for benchmark in report.benchmarks for data in benchmark.output_token_distribution.data]
165+
all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data]
150166
all_output_token_distribution = Distribution(data=all_output_token_data)
151167

152168
prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
153169
output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")
154170

155-
prompt_token_samples = [result.prompt for benchmark in report.benchmarks for result in benchmark.results]
156-
output_token_samples = [result.output for benchmark in report.benchmarks for result in benchmark.results]
171+
prompt_token_samples = [result.prompt for benchmark in benchmarks for result in benchmark.results]
172+
output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]
157173

158174
num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
159175
sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)
160176

161177
sample_prompts = [prompt_token_samples[i] for i in sample_indices]
178+
"""
179+
Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
180+
"""
162181
sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))
163182

164183
sample_outputs = [output_token_samples[i] for i in sample_indices]
165184
sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))
166185

167-
request_over_time_results = generate_request_over_time_data(report.benchmarks)
186+
request_over_time_results = generate_request_over_time_data(benchmarks)
168187

169188
return {
170189
"prompts": {
@@ -184,35 +203,92 @@ def generate_workload_details(report: TextGenerationBenchmarkReport) -> Dict[str
184203
def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
185204
ttft_dist_ms = Distribution(data=[val * 1000 for val in bm.ttft_distribution.data])
186205
ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
187-
tpot_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
188-
tpot_data = generate_metric_report(tpot_dist_ms, 'tpot')
189-
throughput_dist_ms = Distribution(data=[val * 1000 for val in bm.output_token_throughput_distribution.data])
206+
itl_dist_ms = Distribution(data=[val * 1000 for val in bm.itl_distribution.data])
207+
itl_data = generate_metric_report(itl_dist_ms, 'tpot')
208+
throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
190209
throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
191210
latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
192-
time_per_request_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
211+
latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest')
193212
return {
194213
"requestsPerSecond": bm.completed_request_rate,
214+
**itl_data,
195215
**ttft_data,
196-
**tpot_data,
197216
**throughput_data,
198-
**time_per_request_data,
217+
**latency__data,
199218
}
200219

220+
def generate_interpolated_benchmarks(benchmarks: List[TextGenerationBenchmark]):
221+
"""
222+
Should we only use constant rate benchmarks here since synchronous and throughput runs might not be appropriate to lump in for interoplation across all rps?
223+
224+
Other edge-case, what if rps doesn't span more than 1 whole rps even with multiple benchmarks
225+
ex: 1.1, 1.3, 1.5, 2.1, 2.5, can interpolate at 2rps
226+
or worse, 1.1, 1.4, 1.6, can't interpolate
227+
"""
228+
if len(benchmarks) == 1:
229+
return []
230+
231+
sorted_benchmarks = sorted(benchmarks[:], key=lambda bm: bm.completed_request_rate)
232+
rps_values = [bm.completed_request_rate for bm in sorted_benchmarks]
233+
rps_range = list(range(math.ceil(min(rps_values)), math.ceil(max(rps_values))))
234+
235+
ttft_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.ttft_distribution.data), sorted_benchmarks))
236+
interpolated_ttft_data_by_rps = interpolate_data_points(ttft_data_by_rps, rps_range)
237+
238+
itl_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.itl_distribution.data), sorted_benchmarks))
239+
interpolated_itl_data_by_rps = interpolate_data_points(itl_data_by_rps, rps_range)
240+
241+
throughput_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.output_token_throughput_distribution.data), sorted_benchmarks))
242+
interpolated_throughput_data_by_rps = interpolate_data_points(throughput_data_by_rps, rps_range)
243+
244+
latency_data_by_rps = list(map(lambda bm: (bm.completed_request_rate, bm.request_latency_distribution.data), sorted_benchmarks))
245+
interpolated_latency_data_by_rps = interpolate_data_points(latency_data_by_rps, rps_range)
246+
247+
benchmark_json = []
248+
for i in range(len(interpolated_ttft_data_by_rps)):
249+
rps, interpolated_ttft_data = interpolated_ttft_data_by_rps[i]
250+
ttft_dist_ms = Distribution(data=[val * 1000 for val in interpolated_ttft_data])
251+
final_ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
252+
253+
_, interpolated_itl_data = interpolated_itl_data_by_rps[i]
254+
itl_dist_ms = Distribution(data=[val * 1000 for val in interpolated_itl_data])
255+
final_itl_data = generate_metric_report(itl_dist_ms, 'tpot')
256+
257+
_, interpolated_throughput_data = interpolated_throughput_data_by_rps[i]
258+
throughput_dist_ms = Distribution(data=interpolated_throughput_data)
259+
final_throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
260+
261+
_, interpolated_latency_data = interpolated_latency_data_by_rps[i]
262+
latency_dist_ms = Distribution(data=[val * 1000 for val in interpolated_latency_data])
263+
final_latency_data = generate_metric_report(latency_dist_ms, 'timePerRequest')
264+
265+
benchmark_json.append({
266+
"requestsPerSecond": rps,
267+
**final_itl_data,
268+
**final_ttft_data,
269+
**final_throughput_data,
270+
**final_latency_data,
271+
})
272+
return benchmark_json
273+
201274
def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
202-
benchmark_report_json = []
275+
raw_benchmark_json = []
203276
for benchmark in benchmarks:
204277
benchmarks_report = generate_benchmark_json(benchmark)
205-
benchmark_report_json.append(benchmarks_report)
206-
return benchmark_report_json
278+
raw_benchmark_json.append(benchmarks_report)
279+
interpolated_benchmark_json = generate_interpolated_benchmarks(benchmarks)
280+
281+
return { "raw": raw_benchmark_json, "interpolated_by_rps": interpolated_benchmark_json }
207282

208283
def generate_js_variable(variable_name: str, data: dict) -> str:
209284
json_data = json.dumps(data, indent=2)
210285
return f'`window.{variable_name} = {json_data};`' # Wrap in quotes
211286

212287
def generate_ui_api_data(report: TextGenerationBenchmarkReport):
213-
run_info_data = generate_run_info(report)
214-
workload_details_data = generate_workload_details(report)
215-
benchmarks_data = generate_benchmarks_json(report.benchmarks)
288+
filtered_benchmarks = list(filter(lambda bm: bm.completed_request_rate > 0, report.benchmarks))
289+
run_info_data = generate_run_info(report, filtered_benchmarks)
290+
workload_details_data = generate_workload_details(report, filtered_benchmarks)
291+
benchmarks_data = generate_benchmarks_json(filtered_benchmarks)
216292
run_info_script = generate_js_variable("run_info", run_info_data)
217293
workload_details_script = generate_js_variable("workload_details", workload_details_data)
218294
benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)

0 commit comments

Comments
 (0)