Skip to content

Commit 31adea4

Browse files
committed
Define sampling sizes and counts as StatusBreakdowns
1 parent 82a381f commit 31adea4

File tree

1 file changed

+38
-58
lines changed

1 file changed

+38
-58
lines changed

src/guidellm/benchmark/benchmark.py

Lines changed: 38 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@
4747

4848

4949
SuccessfulT = TypeVar("SuccessfulT")
50-
IncompleteT = TypeVar("IncompleteT")
51-
ErroredT = TypeVar("ErroredT")
52-
class StatusBreakdown(StandardBaseModel, Generic[SuccessfulT, IncompleteT, ErroredT]):
50+
ErroredT = TypeVar("ErroredT", default=SuccessfulT)
51+
IncompleteT = TypeVar("IncompleteT", default=ErroredT)
52+
class StatusBreakdown(StandardBaseModel, Generic[SuccessfulT, ErroredT, IncompleteT]):
5353
"""
5454
A serializable model representing the breakdown of statistics for a benchmark run
5555
split into successful, incomplete, and errored.
@@ -603,42 +603,16 @@ class GenerativeBenchmark(Benchmark):
603603
"""
604604

605605
type_: Literal["generative_benchmark"] = "generative_benchmark" # type: ignore[assignment]
606-
successful_total: int = Field(
606+
total_count: StatusBreakdown[int] = Field(
607607
description=(
608-
"The total number of completed requests in the benchmark, "
608+
"The total number of requests in the benchmark, "
609609
"excluding warmup and cooldown."
610610
)
611611
)
612-
successful_sampled_size: Optional[int] = Field(
612+
sampled_size: Optional[StatusBreakdown[int]] = Field(
613613
default=None,
614614
description=(
615-
"The number of completed requests that were randomly sampled for "
616-
"the benchmark. None if no sampling was applied."
617-
),
618-
)
619-
incomplete_total: int = Field(
620-
description=(
621-
"The total number of incomplete requests in the benchmark, "
622-
"excluding warmup and cooldown."
623-
)
624-
)
625-
incomplete_sampled_size: Optional[int] = Field(
626-
default=None,
627-
description=(
628-
"The number of incomplete requests that were randomly sampled for "
629-
"the benchmark. None if no sampling was applied."
630-
),
631-
)
632-
errored_total: int = Field(
633-
description=(
634-
"The total number of errored requests in the benchmark, "
635-
"excluding warmup and cooldown."
636-
)
637-
)
638-
errored_sampled_size: Optional[int] = Field(
639-
default=None,
640-
description=(
641-
"The number of errored requests that were randomly sampled for "
615+
"The number of requests that were randomly sampled for "
642616
"the benchmark. None if no sampling was applied."
643617
),
644618
)
@@ -648,6 +622,15 @@ class GenerativeBenchmark(Benchmark):
648622
end_time: float = Field(
649623
description="The end time of the last request for the benchmark.",
650624
)
625+
@computed_field # type: ignore[misc]
626+
@property
627+
def duration(self) -> float:
628+
"""
629+
:return: The duration of the benchmark in seconds from the start of the
630+
first request to the end of the last request.
631+
"""
632+
return self.end_time - self.start_time
633+
651634
metrics: GenerativeMetrics = Field(
652635
description=(
653636
"The metrics for the benchmark run represented as a distribution of "
@@ -658,23 +641,13 @@ class GenerativeBenchmark(Benchmark):
658641
requests: StatusBreakdown[
659642
List[GenerativeTextResponseStats],
660643
List[GenerativeTextErrorStats],
661-
List[GenerativeTextErrorStats]
662644
] = Field(
663645
description=(
664646
"The breakdown of requests for the benchmark run including completed, "
665647
"incomplete, and errored requests."
666648
),
667649
)
668650

669-
@computed_field # type: ignore[misc]
670-
@property
671-
def duration(self) -> float:
672-
"""
673-
:return: The duration of the benchmark in seconds from the start of the
674-
first request to the end of the last request.
675-
"""
676-
return self.end_time - self.start_time
677-
678651
def create_sampled(
679652
self, sample_size: int, error_sample_size: Optional[int] = None
680653
) -> "GenerativeBenchmark":
@@ -703,30 +676,30 @@ def create_sampled(
703676
)
704677

705678
if (
706-
self.successful_sampled_size is not None
707-
and sample_size > self.successful_sampled_size
679+
self.sampled_size is not None
680+
and sample_size > self.sampled_size.successful
708681
):
709682
raise ValueError(
710683
"The benchmark's completed response have already been sampled with "
711-
f"size {self.successful_sampled_size} and cannot be resampled with "
684+
f"size {self.sampled_size.successful} and cannot be resampled with "
712685
f"a larger size, given: {sample_size}"
713686
)
714687
if (
715-
self.incomplete_sampled_size is not None
716-
and sample_size > self.incomplete_sampled_size
688+
self.sampled_size is not None
689+
and sample_size > self.sampled_size.incomplete
717690
):
718691
raise ValueError(
719692
"The benchmark's incomplete response have already been sampled with "
720-
f"size {self.incomplete_sampled_size} and cannot be resampled with "
693+
f"size {self.sampled_size.incomplete} and cannot be resampled with "
721694
f"a larger size, given: {sample_size}"
722695
)
723696
if (
724-
self.errored_sampled_size is not None
725-
and error_sample_size > self.errored_sampled_size
697+
self.sampled_size is not None
698+
and error_sample_size > self.sampled_size.errored
726699
):
727700
raise ValueError(
728701
"The benchmark's errored response have already been sampled with "
729-
f"size {self.errored_sampled_size} and cannot be resampled with "
702+
f"size {self.sampled_size.errored} and cannot be resampled with "
730703
f"a larger size, given: {error_sample_size}"
731704
)
732705

@@ -735,15 +708,20 @@ def create_sampled(
735708
error_sample_size = min(error_sample_size, len(self.requests.errored))
736709

737710
sampled_instance = self.model_copy()
738-
sampled_instance.successful_sampled_size = sample_size
711+
sampled_instance.sampled_size = StatusBreakdown(
712+
successful=0,
713+
incomplete=0,
714+
errored=0,
715+
)
716+
sampled_instance.sampled_size.successful = sample_size
739717
sampled_instance.requests.successful = random.sample(
740718
self.requests.successful, sample_size
741719
)
742-
sampled_instance.incomplete_sampled_size = incomplete_sample_size
720+
sampled_instance.sampled_size.incomplete = incomplete_sample_size
743721
sampled_instance.requests.incomplete = random.sample(
744722
self.requests.incomplete, incomplete_sample_size
745723
)
746-
sampled_instance.errored_sampled_size = error_sample_size
724+
sampled_instance.sampled_size.errored = error_sample_size
747725
sampled_instance.requests.errored = random.sample(
748726
self.requests.errored, error_sample_size
749727
)
@@ -835,9 +813,11 @@ def from_stats(
835813
worker=worker,
836814
request_loader=requests_loader,
837815
extras=extras or {},
838-
successful_total=len(successful),
839-
incomplete_total=len(incomplete),
840-
errored_total=len(errored),
816+
total_count=StatusBreakdown(
817+
successful=len(successful),
818+
incomplete=len(incomplete),
819+
errored=len(errored),
820+
),
841821
start_time=start_time,
842822
end_time=end_time,
843823
metrics=GenerativeMetrics(

0 commit comments

Comments
 (0)