4747
4848
4949SuccessfulT = TypeVar ("SuccessfulT" )
50- IncompleteT = TypeVar ("IncompleteT" )
51- ErroredT = TypeVar ("ErroredT" )
52- class StatusBreakdown (StandardBaseModel , Generic [SuccessfulT , IncompleteT , ErroredT ]):
50+ ErroredT = TypeVar ("ErroredT" , default = SuccessfulT )
51+ IncompleteT = TypeVar ("IncompleteT" , default = ErroredT )
52+ class StatusBreakdown (StandardBaseModel , Generic [SuccessfulT , ErroredT , IncompleteT ]):
5353 """
5454 A serializable model representing the breakdown of statistics for a benchmark run
5555 split into successful, incomplete, and errored.
@@ -603,42 +603,16 @@ class GenerativeBenchmark(Benchmark):
603603 """
604604
605605 type_ : Literal ["generative_benchmark" ] = "generative_benchmark" # type: ignore[assignment]
606- successful_total : int = Field (
606+ total_count : StatusBreakdown [ int ] = Field (
607607 description = (
608- "The total number of completed requests in the benchmark, "
608+ "The total number of requests in the benchmark, "
609609 "excluding warmup and cooldown."
610610 )
611611 )
612- successful_sampled_size : Optional [int ] = Field (
612+ sampled_size : Optional [StatusBreakdown [ int ] ] = Field (
613613 default = None ,
614614 description = (
615- "The number of completed requests that were randomly sampled for "
616- "the benchmark. None if no sampling was applied."
617- ),
618- )
619- incomplete_total : int = Field (
620- description = (
621- "The total number of incomplete requests in the benchmark, "
622- "excluding warmup and cooldown."
623- )
624- )
625- incomplete_sampled_size : Optional [int ] = Field (
626- default = None ,
627- description = (
628- "The number of incomplete requests that were randomly sampled for "
629- "the benchmark. None if no sampling was applied."
630- ),
631- )
632- errored_total : int = Field (
633- description = (
634- "The total number of errored requests in the benchmark, "
635- "excluding warmup and cooldown."
636- )
637- )
638- errored_sampled_size : Optional [int ] = Field (
639- default = None ,
640- description = (
641- "The number of errored requests that were randomly sampled for "
615+ "The number of requests that were randomly sampled for "
642616 "the benchmark. None if no sampling was applied."
643617 ),
644618 )
@@ -648,6 +622,15 @@ class GenerativeBenchmark(Benchmark):
648622 end_time : float = Field (
649623 description = "The end time of the last request for the benchmark." ,
650624 )
625+ @computed_field # type: ignore[misc]
626+ @property
627+ def duration (self ) -> float :
628+ """
629+ :return: The duration of the benchmark in seconds from the start of the
630+ first request to the end of the last request.
631+ """
632+ return self .end_time - self .start_time
633+
651634 metrics : GenerativeMetrics = Field (
652635 description = (
653636 "The metrics for the benchmark run represented as a distribution of "
@@ -658,23 +641,13 @@ class GenerativeBenchmark(Benchmark):
658641 requests : StatusBreakdown [
659642 List [GenerativeTextResponseStats ],
660643 List [GenerativeTextErrorStats ],
661- List [GenerativeTextErrorStats ]
662644 ] = Field (
663645 description = (
664646 "The breakdown of requests for the benchmark run including completed, "
665647 "incomplete, and errored requests."
666648 ),
667649 )
668650
669- @computed_field # type: ignore[misc]
670- @property
671- def duration (self ) -> float :
672- """
673- :return: The duration of the benchmark in seconds from the start of the
674- first request to the end of the last request.
675- """
676- return self .end_time - self .start_time
677-
678651 def create_sampled (
679652 self , sample_size : int , error_sample_size : Optional [int ] = None
680653 ) -> "GenerativeBenchmark" :
@@ -703,30 +676,30 @@ def create_sampled(
703676 )
704677
705678 if (
706- self .successful_sampled_size is not None
707- and sample_size > self .successful_sampled_size
679+ self .sampled_size is not None
680+ and sample_size > self .sampled_size . successful
708681 ):
709682 raise ValueError (
710683 "The benchmark's completed response have already been sampled with "
711- f"size { self .successful_sampled_size } and cannot be resampled with "
684+ f"size { self .sampled_size . successful } and cannot be resampled with "
712685 f"a larger size, given: { sample_size } "
713686 )
714687 if (
715- self .incomplete_sampled_size is not None
716- and sample_size > self .incomplete_sampled_size
688+ self .sampled_size is not None
689+ and sample_size > self .sampled_size . incomplete
717690 ):
718691 raise ValueError (
719692 "The benchmark's incomplete response have already been sampled with "
720- f"size { self .incomplete_sampled_size } and cannot be resampled with "
693+ f"size { self .sampled_size . incomplete } and cannot be resampled with "
721694 f"a larger size, given: { sample_size } "
722695 )
723696 if (
724- self .errored_sampled_size is not None
725- and error_sample_size > self .errored_sampled_size
697+ self .sampled_size is not None
698+ and error_sample_size > self .sampled_size . errored
726699 ):
727700 raise ValueError (
728701 "The benchmark's errored response have already been sampled with "
729- f"size { self .errored_sampled_size } and cannot be resampled with "
702+ f"size { self .sampled_size . errored } and cannot be resampled with "
730703 f"a larger size, given: { error_sample_size } "
731704 )
732705
@@ -735,15 +708,20 @@ def create_sampled(
735708 error_sample_size = min (error_sample_size , len (self .requests .errored ))
736709
737710 sampled_instance = self .model_copy ()
738- sampled_instance .successful_sampled_size = sample_size
711+ sampled_instance .sampled_size = StatusBreakdown (
712+ successful = 0 ,
713+ incomplete = 0 ,
714+ errored = 0 ,
715+ )
716+ sampled_instance .sampled_size .successful = sample_size
739717 sampled_instance .requests .successful = random .sample (
740718 self .requests .successful , sample_size
741719 )
742- sampled_instance .incomplete_sampled_size = incomplete_sample_size
720+ sampled_instance .sampled_size . incomplete = incomplete_sample_size
743721 sampled_instance .requests .incomplete = random .sample (
744722 self .requests .incomplete , incomplete_sample_size
745723 )
746- sampled_instance .errored_sampled_size = error_sample_size
724+ sampled_instance .sampled_size . errored = error_sample_size
747725 sampled_instance .requests .errored = random .sample (
748726 self .requests .errored , error_sample_size
749727 )
@@ -835,9 +813,11 @@ def from_stats(
835813 worker = worker ,
836814 request_loader = requests_loader ,
837815 extras = extras or {},
838- successful_total = len (successful ),
839- incomplete_total = len (incomplete ),
840- errored_total = len (errored ),
816+ total_count = StatusBreakdown (
817+ successful = len (successful ),
818+ incomplete = len (incomplete ),
819+ errored = len (errored ),
820+ ),
841821 start_time = start_time ,
842822 end_time = end_time ,
843823 metrics = GenerativeMetrics (
0 commit comments