Skip to content

Commit 3b821c8

Browse files
committed
Move metrics to subobject in output
1 parent f8161ed commit 3b821c8

File tree

1 file changed

+147
-118
lines changed

1 file changed

+147
-118
lines changed

src/guidellm/benchmark/benchmark.py

Lines changed: 147 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@
3737
"BenchmarkArgs",
3838
"BenchmarkRunStats",
3939
"Benchmark",
40+
"BenchmarkMetrics",
4041
"GenerativeTextResponseStats",
4142
"GenerativeTextErrorStats",
43+
"GenerativeMetrics",
4244
"GenerativeBenchmark",
4345
]
4446

@@ -234,6 +236,19 @@ def total(self) -> int:
234236
return self.total_successful + self.total_incomplete + self.total_errored
235237

236238

239+
class BenchmarkMetrics(StandardBaseModel):
240+
"""
241+
A serializable model representing the metrics for a benchmark run.
242+
"""
243+
244+
request_per_second: StatusDistributionSummary = Field(
245+
description="The distribution of requests per second for the benchmark.",
246+
)
247+
request_concurrency: StatusDistributionSummary = Field(
248+
description="The distribution of requests concurrency for the benchmark.",
249+
)
250+
251+
237252
class Benchmark(StandardBaseModel):
238253
"""
239254
The base serializable model representing a benchmark run and its results.
@@ -291,11 +306,11 @@ class Benchmark(StandardBaseModel):
291306
)
292307
)
293308

294-
requests_per_second: StatusDistributionSummary = Field(
295-
description="The distribution of requests per second for the benchmark.",
296-
)
297-
requests_concurrency: StatusDistributionSummary = Field(
298-
description="The distribution of requests concurrency for the benchmark.",
309+
metrics: BenchmarkMetrics = Field(
310+
description=(
311+
"The metrics for the benchmark run represented as a distribution of "
312+
"various per-request statistics."
313+
),
299314
)
300315

301316

@@ -506,6 +521,59 @@ def output_tokens_per_second(self) -> Optional[float]: # type: ignore[override]
506521
return super().output_tokens_per_second
507522

508523

524+
class GenerativeMetrics(BenchmarkMetrics):
525+
"""
526+
A serializable model representing the metrics for a generative benchmark run.
527+
"""
528+
529+
request_latency: StatusDistributionSummary = Field(
530+
description="The distribution of latencies for the completed requests.",
531+
)
532+
prompt_token_count: StatusDistributionSummary = Field(
533+
description=(
534+
"The distribution of token counts in the prompts for completed, "
535+
"errored, and all requests."
536+
)
537+
)
538+
output_token_count: StatusDistributionSummary = Field(
539+
description=(
540+
"The distribution of token counts in the outputs for completed, "
541+
"errored, and all requests."
542+
)
543+
)
544+
time_to_first_token_ms: StatusDistributionSummary = Field(
545+
description=(
546+
"The distribution of latencies to receiving the first token in "
547+
"milliseconds for completed, errored, and all requests."
548+
),
549+
)
550+
time_per_output_token_ms: StatusDistributionSummary = Field(
551+
description=(
552+
"The distribution of latencies per output token in milliseconds for "
553+
"completed, errored, and all requests. "
554+
"This includes the time to generate the first token and all other tokens."
555+
),
556+
)
557+
inter_token_latency_ms: StatusDistributionSummary = Field(
558+
description=(
559+
"The distribution of latencies between tokens in milliseconds for "
560+
"completed, errored, and all requests."
561+
),
562+
)
563+
output_tokens_per_second: StatusDistributionSummary = Field(
564+
description=(
565+
"The distribution of output tokens per second for completed, "
566+
"errored, and all requests."
567+
),
568+
)
569+
tokens_per_second: StatusDistributionSummary = Field(
570+
description=(
571+
"The distribution of tokens per second, including prompt and output tokens "
572+
"for completed, errored, and all requests."
573+
),
574+
)
575+
576+
509577
class GenerativeBenchmark(Benchmark):
510578
"""
511579
A serializable model representing a benchmark run and its results for generative
@@ -568,51 +636,10 @@ class GenerativeBenchmark(Benchmark):
568636
end_time: float = Field(
569637
description="The end time of the last request for the benchmark.",
570638
)
571-
572-
request_latency: StatusDistributionSummary = Field(
573-
description="The distribution of latencies for the completed requests.",
574-
)
575-
prompt_token_count: StatusDistributionSummary = Field(
576-
description=(
577-
"The distribution of token counts in the prompts for completed, "
578-
"errored, and all requests."
579-
)
580-
)
581-
output_token_count: StatusDistributionSummary = Field(
582-
description=(
583-
"The distribution of token counts in the outputs for completed, "
584-
"errored, and all requests."
585-
)
586-
)
587-
time_to_first_token_ms: StatusDistributionSummary = Field(
639+
metrics: GenerativeMetrics = Field(
588640
description=(
589-
"The distribution of latencies to receiving the first token in "
590-
"milliseconds for completed, errored, and all requests."
591-
),
592-
)
593-
time_per_output_token_ms: StatusDistributionSummary = Field(
594-
description=(
595-
"The distribution of latencies per output token in milliseconds for "
596-
"completed, errored, and all requests. "
597-
"This includes the time to generate the first token and all other tokens."
598-
),
599-
)
600-
inter_token_latency_ms: StatusDistributionSummary = Field(
601-
description=(
602-
"The distribution of latencies between tokens in milliseconds for "
603-
"completed, errored, and all requests."
604-
),
605-
)
606-
output_tokens_per_second: StatusDistributionSummary = Field(
607-
description=(
608-
"The distribution of output tokens per second for completed, "
609-
"errored, and all requests."
610-
),
611-
)
612-
tokens_per_second: StatusDistributionSummary = Field(
613-
description=(
614-
"The distribution of tokens per second, including prompt and output tokens "
615-
"for completed, errored, and all requests."
641+
"The metrics for the benchmark run represented as a distribution of "
642+
"various per-request statistics."
616643
),
617644
)
618645

@@ -793,74 +820,76 @@ def from_stats(
793820
errored_requests=errored,
794821
start_time=start_time,
795822
end_time=end_time,
796-
requests_per_second=StatusDistributionSummary.from_request_times(
797-
request_types=total_types,
798-
requests=[(req.start_time, req.end_time) for req in total],
799-
distribution_type="rate",
800-
),
801-
requests_concurrency=StatusDistributionSummary.from_request_times(
802-
request_types=total_types,
803-
requests=[(req.start_time, req.end_time) for req in total],
804-
distribution_type="concurrency",
805-
),
806-
request_latency=StatusDistributionSummary.from_values(
807-
value_types=total_types,
808-
values=[req.request_latency for req in total],
809-
),
810-
prompt_token_count=StatusDistributionSummary.from_values(
811-
value_types=list(total_types_with_prompt),
812-
values=[req.prompt_tokens for req in total_with_prompt],
813-
),
814-
output_token_count=StatusDistributionSummary.from_values(
815-
value_types=list(total_types_with_output_first),
816-
values=[req.output_tokens for req in total_with_output_first],
817-
),
818-
time_to_first_token_ms=StatusDistributionSummary.from_values(
819-
value_types=list(total_types_with_output_first),
820-
values=[
821-
req.time_to_first_token_ms or 0 for req in total_with_output_first
822-
],
823-
),
824-
time_per_output_token_ms=StatusDistributionSummary.from_values(
825-
value_types=list(total_types_with_output_first),
826-
values=[
827-
req.time_per_output_token_ms or 0 for req in total_with_output_first
828-
],
829-
weights=[req.output_tokens for req in total_with_output_first],
830-
),
831-
inter_token_latency_ms=StatusDistributionSummary.from_values(
832-
value_types=list(total_types_with_output_multi),
833-
values=[
834-
req.inter_token_latency_ms or 0 for req in total_with_output_multi
835-
],
836-
weights=[req.output_tokens - 1 for req in total_with_output_multi],
837-
),
838-
output_tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
839-
request_types=list(total_types_with_output_first),
840-
requests=[
841-
(req.start_time, req.end_time) for req in total_with_output_first
842-
],
843-
first_iter_times=[
844-
req.first_token_time or req.start_time
845-
for req in total_with_output_first
846-
],
847-
iter_counts=[req.output_tokens for req in total_with_output_first],
848-
),
849-
tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
850-
request_types=list(total_types_with_output_first),
851-
requests=[
852-
(req.start_time, req.end_time) for req in total_with_output_first
853-
],
854-
first_iter_times=[
855-
req.first_token_time or req.start_time
856-
for req in total_with_output_first
857-
],
858-
iter_counts=[
859-
req.prompt_tokens + req.output_tokens
860-
for req in total_with_output_first
861-
],
862-
first_iter_counts=[
863-
req.prompt_tokens for req in total_with_output_first
864-
],
823+
metrics=GenerativeMetrics(
824+
request_per_second=StatusDistributionSummary.from_request_times(
825+
request_types=total_types,
826+
requests=[(req.start_time, req.end_time) for req in total],
827+
distribution_type="rate",
828+
),
829+
request_concurrency=StatusDistributionSummary.from_request_times(
830+
request_types=total_types,
831+
requests=[(req.start_time, req.end_time) for req in total],
832+
distribution_type="concurrency",
833+
),
834+
request_latency=StatusDistributionSummary.from_values(
835+
value_types=total_types,
836+
values=[req.request_latency for req in total],
837+
),
838+
prompt_token_count=StatusDistributionSummary.from_values(
839+
value_types=list(total_types_with_prompt),
840+
values=[req.prompt_tokens for req in total_with_prompt],
841+
),
842+
output_token_count=StatusDistributionSummary.from_values(
843+
value_types=list(total_types_with_output_first),
844+
values=[req.output_tokens for req in total_with_output_first],
845+
),
846+
time_to_first_token_ms=StatusDistributionSummary.from_values(
847+
value_types=list(total_types_with_output_first),
848+
values=[
849+
req.time_to_first_token_ms or 0 for req in total_with_output_first
850+
],
851+
),
852+
time_per_output_token_ms=StatusDistributionSummary.from_values(
853+
value_types=list(total_types_with_output_first),
854+
values=[
855+
req.time_per_output_token_ms or 0 for req in total_with_output_first
856+
],
857+
weights=[req.output_tokens for req in total_with_output_first],
858+
),
859+
inter_token_latency_ms=StatusDistributionSummary.from_values(
860+
value_types=list(total_types_with_output_multi),
861+
values=[
862+
req.inter_token_latency_ms or 0 for req in total_with_output_multi
863+
],
864+
weights=[req.output_tokens - 1 for req in total_with_output_multi],
865+
),
866+
output_tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
867+
request_types=list(total_types_with_output_first),
868+
requests=[
869+
(req.start_time, req.end_time) for req in total_with_output_first
870+
],
871+
first_iter_times=[
872+
req.first_token_time or req.start_time
873+
for req in total_with_output_first
874+
],
875+
iter_counts=[req.output_tokens for req in total_with_output_first],
876+
),
877+
tokens_per_second=StatusDistributionSummary.from_iterable_request_times(
878+
request_types=list(total_types_with_output_first),
879+
requests=[
880+
(req.start_time, req.end_time) for req in total_with_output_first
881+
],
882+
first_iter_times=[
883+
req.first_token_time or req.start_time
884+
for req in total_with_output_first
885+
],
886+
iter_counts=[
887+
req.prompt_tokens + req.output_tokens
888+
for req in total_with_output_first
889+
],
890+
first_iter_counts=[
891+
req.prompt_tokens for req in total_with_output_first
892+
],
893+
),
865894
),
866895
)

0 commit comments

Comments
 (0)