11from collections import defaultdict
22from math import ceil
3- from pydantic import BaseModel
3+ from pydantic import computed_field , BaseModel
44import random
55from typing import List , Optional , Tuple
66
77from guidellm .benchmark .benchmark import GenerativeBenchmark
88from guidellm .objects .statistics import DistributionSummary
99
10- __all__ = ["Bucket" , "Model" , "Dataset" , "RunInfo" , "TokenDistribution " , "TokenDetails" , "Server" , "WorkloadDetails" , "BenchmarkDatum" ]
10+ __all__ = ["Bucket" , "Model" , "Dataset" , "RunInfo" , "Distribution " , "TokenDetails" , "Server" , "WorkloadDetails" , "BenchmarkDatum" ]
1111
1212class Bucket (BaseModel ):
1313 value : float
@@ -69,22 +69,22 @@ def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
6969 dataset = Dataset (name = "N/A" )
7070 )
7171
72- class TokenDistribution (BaseModel ):
72+ class Distribution (BaseModel ):
7373 statistics : Optional [DistributionSummary ] = None
7474 buckets : list [Bucket ]
7575 bucket_width : float
7676
7777
7878class TokenDetails (BaseModel ):
7979 samples : list [str ]
80- token_distributions : TokenDistribution
80+ token_distributions : Distribution
8181
8282class Server (BaseModel ):
8383 target : str
8484
8585class RequestOverTime (BaseModel ):
8686 num_benchmarks : int
87- requests_over_time : TokenDistribution
87+ requests_over_time : Distribution
8888
8989class WorkloadDetails (BaseModel ):
9090 prompts : TokenDetails
@@ -109,8 +109,8 @@ def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
109109
110110 prompt_token_stats = DistributionSummary .from_values (prompt_tokens )
111111 output_token_stats = DistributionSummary .from_values (output_tokens )
112- prompt_token_distributions = TokenDistribution (statistics = prompt_token_stats , buckets = prompt_token_buckets , bucket_width = 1 )
113- output_token_distributions = TokenDistribution (statistics = output_token_stats , buckets = output_token_buckets , bucket_width = 1 )
112+ prompt_token_distributions = Distribution (statistics = prompt_token_stats , buckets = prompt_token_buckets , bucket_width = 1 )
113+ output_token_distributions = Distribution (statistics = output_token_stats , buckets = output_token_buckets , bucket_width = 1 )
114114
115115 min_start_time = benchmarks [0 ].run_stats .start_time
116116
@@ -122,7 +122,7 @@ def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
122122 ]
123123 number_of_buckets = len (benchmarks )
124124 request_over_time_buckets , bucket_width = Bucket .from_data (all_req_times , None , number_of_buckets )
125- request_over_time_distribution = TokenDistribution (buckets = request_over_time_buckets , bucket_width = bucket_width )
125+ request_over_time_distribution = Distribution (buckets = request_over_time_buckets , bucket_width = bucket_width )
126126 return cls (
127127 prompts = TokenDetails (samples = sample_prompts , token_distributions = prompt_token_distributions ),
128128 generations = TokenDetails (samples = sample_outputs , token_distributions = output_token_distributions ),
@@ -131,19 +131,39 @@ def from_benchmarks(cls, benchmarks: list[GenerativeBenchmark]):
131131 server = Server (target = target )
132132 )
133133
134+ class TabularDistributionSummary (DistributionSummary ):
135+ """
136+ Same fields as `DistributionSummary`, but adds a ready-to-serialize/iterate
137+ `percentile_rows` helper.
138+ """
139+
140+ @computed_field
141+ @property
142+ def percentile_rows (self ) -> list [dict [str , float ]]:
143+ return [
144+ {"percentile" : name , "value" : value }
145+ for name , value in self .percentiles .model_dump ().items ()
146+ ]
147+
148+ @classmethod
149+ def from_distribution_summary (
150+ cls , distribution : DistributionSummary
151+ ) -> "TabularDistributionSummary" :
152+ return cls (** distribution .model_dump ())
153+
134154class BenchmarkDatum (BaseModel ):
135155 requests_per_second : float
136- tpot : DistributionSummary
137- ttft : DistributionSummary
138- throughput : DistributionSummary
139- time_per_request : DistributionSummary
156+ tpot : TabularDistributionSummary
157+ ttft : TabularDistributionSummary
158+ throughput : TabularDistributionSummary
159+ time_per_request : TabularDistributionSummary
140160
141161 @classmethod
142162 def from_benchmark (cls , bm : GenerativeBenchmark ):
143163 return cls (
144164 requests_per_second = bm .metrics .requests_per_second .successful .mean ,
145- tpot = bm .metrics .inter_token_latency_ms .successful ,
146- ttft = bm .metrics .time_to_first_token_ms .successful ,
147- throughput = bm .metrics .output_tokens_per_second .successful ,
148- time_per_request = bm .metrics .request_latency .successful ,
165+ tpot = TabularDistributionSummary . from_distribution_summary ( bm .metrics .inter_token_latency_ms .successful ) ,
166+ ttft = TabularDistributionSummary . from_distribution_summary ( bm .metrics .time_to_first_token_ms .successful ) ,
167+ throughput = TabularDistributionSummary . from_distribution_summary ( bm .metrics .output_tokens_per_second .successful ) ,
168+ time_per_request = TabularDistributionSummary . from_distribution_summary ( bm .metrics .request_latency .successful ) ,
149169 )
0 commit comments