4141 "GenerativeTextResponseStats" ,
4242 "GenerativeTextErrorStats" ,
4343 "GenerativeMetrics" ,
44+ "GenerativeRequestsBreakdown" ,
4445 "GenerativeBenchmark" ,
4546]
4647
@@ -574,6 +575,23 @@ class GenerativeMetrics(BenchmarkMetrics):
574575 )
575576
576577
578+ class GenerativeRequestsBreakdown (StandardBaseModel ):
579+ """
580+ A serializable model representing the breakdown of requests for a generative
581+ benchmark run.
582+ """
583+
584+ successful : List [GenerativeTextResponseStats ] = Field (
585+ description = "The list of completed requests." ,
586+ )
587+ incomplete : List [GenerativeTextErrorStats ] = Field (
588+ description = "The list of incomplete requests." ,
589+ )
590+ errored : List [GenerativeTextErrorStats ] = Field (
591+ description = "The list of errored requests." ,
592+ )
593+
594+
577595class GenerativeBenchmark (Benchmark ):
578596 """
579597 A serializable model representing a benchmark run and its results for generative
@@ -595,9 +613,6 @@ class GenerativeBenchmark(Benchmark):
595613 "the benchmark. None if no sampling was applied."
596614 ),
597615 )
598- successful_requests : List [GenerativeTextResponseStats ] = Field (
599- description = "The list of completed requests." ,
600- )
601616 incomplete_total : int = Field (
602617 description = (
603618 "The total number of incomplete requests in the benchmark, "
@@ -611,9 +626,6 @@ class GenerativeBenchmark(Benchmark):
611626 "the benchmark. None if no sampling was applied."
612627 ),
613628 )
614- incomplete_requests : List [GenerativeTextErrorStats ] = Field (
615- description = "The list of incomplete requests." ,
616- )
617629 errored_total : int = Field (
618630 description = (
619631 "The total number of errored requests in the benchmark, "
@@ -627,9 +639,6 @@ class GenerativeBenchmark(Benchmark):
627639 "the benchmark. None if no sampling was applied."
628640 ),
629641 )
630- errored_requests : List [GenerativeTextErrorStats ] = Field (
631- description = "The list of errored requests." ,
632- )
633642 start_time : float = Field (
634643 description = "The start time of the first request for the benchmark." ,
635644 )
@@ -642,6 +651,13 @@ class GenerativeBenchmark(Benchmark):
642651 "various per-request statistics."
643652 ),
644653 )
654+ # Output is ordered so keep this at the end
655+ requests : GenerativeRequestsBreakdown = Field (
656+ description = (
657+ "The breakdown of requests for the benchmark run including completed, "
658+ "incomplete, and errored requests."
659+ ),
660+ )
645661
646662 @computed_field # type: ignore[misc]
647663 @property
@@ -707,22 +723,22 @@ def create_sampled(
707723 f"a larger size, given: { error_sample_size } "
708724 )
709725
710- sample_size = min (sample_size , len (self .successful_requests ))
711- incomplete_sample_size = min (sample_size , len (self .incomplete_requests ))
712- error_sample_size = min (error_sample_size , len (self .errored_requests ))
726+ sample_size = min (sample_size , len (self .requests . successful ))
727+ incomplete_sample_size = min (sample_size , len (self .requests . incomplete ))
728+ error_sample_size = min (error_sample_size , len (self .requests . errored ))
713729
714730 sampled_instance = self .model_copy ()
715731 sampled_instance .successful_sampled_size = sample_size
716- sampled_instance .successful_requests = random .sample (
717- self .successful_requests , sample_size
732+ sampled_instance .requests . successful = random .sample (
733+ self .requests . successful , sample_size
718734 )
719735 sampled_instance .incomplete_sampled_size = incomplete_sample_size
720- sampled_instance .incomplete_requests = random .sample (
721- self .incomplete_requests , incomplete_sample_size
736+ sampled_instance .requests . incomplete = random .sample (
737+ self .requests . incomplete , incomplete_sample_size
722738 )
723739 sampled_instance .errored_sampled_size = error_sample_size
724- sampled_instance .errored_requests = random .sample (
725- self .errored_requests , error_sample_size
740+ sampled_instance .requests . errored = random .sample (
741+ self .requests . errored , error_sample_size
726742 )
727743
728744 return sampled_instance
@@ -813,11 +829,8 @@ def from_stats(
813829 request_loader = requests_loader ,
814830 extras = extras or {},
815831 successful_total = len (successful ),
816- successful_requests = successful ,
817832 incomplete_total = len (incomplete ),
818- incomplete_requests = incomplete ,
819833 errored_total = len (errored ),
820- errored_requests = errored ,
821834 start_time = start_time ,
822835 end_time = end_time ,
823836 metrics = GenerativeMetrics (
@@ -892,4 +905,9 @@ def from_stats(
892905 ],
893906 ),
894907 ),
908+ requests = GenerativeRequestsBreakdown (
909+ successful = successful ,
910+ incomplete = incomplete ,
911+ errored = errored ,
912+ ),
895913 )
0 commit comments