|
1 | 1 | import random |
2 | 2 | import uuid |
3 | | -from typing import Any, Dict, List, Literal, Optional, TypeVar, Union |
| 3 | +from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union |
4 | 4 |
|
5 | 5 | from pydantic import Field, computed_field |
6 | 6 |
|
|
34 | 34 |
|
35 | 35 | __all__ = [ |
36 | 36 | "BENCH", |
| 37 | + "StatusBreakdown", |
37 | 38 | "BenchmarkArgs", |
38 | 39 | "BenchmarkRunStats", |
39 | 40 | "Benchmark", |
40 | 41 | "BenchmarkMetrics", |
41 | 42 | "GenerativeTextResponseStats", |
42 | 43 | "GenerativeTextErrorStats", |
43 | 44 | "GenerativeMetrics", |
44 | | - "GenerativeRequestsBreakdown", |
45 | 45 | "GenerativeBenchmark", |
46 | 46 | ] |
47 | 47 |
|
48 | 48 |
|
| 49 | +SuccessfulT = TypeVar("SuccessfulT") |
| 50 | +IncompleteT = TypeVar("IncompleteT") |
| 51 | +ErroredT = TypeVar("ErroredT") |
| 52 | +class StatusBreakdown(StandardBaseModel, Generic[SuccessfulT, IncompleteT, ErroredT]): |
| 53 | + """ |
| 54 | + A serializable model representing the breakdown of statistics for a benchmark run |
| 55 | + split into successful, incomplete, and errored. |
| 56 | + """ |
| 57 | + |
| 58 | + successful: SuccessfulT = Field( |
| 59 | + description="Successful", |
| 60 | + ) |
| 61 | + incomplete: IncompleteT = Field( |
| 62 | + description="Incomplete", |
| 63 | + ) |
| 64 | + errored: ErroredT = Field( |
| 65 | + description="Errored", |
| 66 | + ) |
| 67 | + |
| 68 | + |
49 | 69 | class BenchmarkArgs(StandardBaseModel): |
50 | 70 | """ |
51 | 71 | A serializable model representing the arguments used to specify a benchmark run |
@@ -575,23 +595,6 @@ class GenerativeMetrics(BenchmarkMetrics): |
575 | 595 | ) |
576 | 596 |
|
577 | 597 |
|
578 | | -class GenerativeRequestsBreakdown(StandardBaseModel): |
579 | | - """ |
580 | | - A serializable model representing the breakdown of requests for a generative |
581 | | - benchmark run. |
582 | | - """ |
583 | | - |
584 | | - successful: List[GenerativeTextResponseStats] = Field( |
585 | | - description="The list of completed requests.", |
586 | | - ) |
587 | | - incomplete: List[GenerativeTextErrorStats] = Field( |
588 | | - description="The list of incomplete requests.", |
589 | | - ) |
590 | | - errored: List[GenerativeTextErrorStats] = Field( |
591 | | - description="The list of errored requests.", |
592 | | - ) |
593 | | - |
594 | | - |
595 | 598 | class GenerativeBenchmark(Benchmark): |
596 | 599 | """ |
597 | 600 | A serializable model representing a benchmark run and its results for generative |
@@ -652,7 +655,11 @@ class GenerativeBenchmark(Benchmark): |
652 | 655 | ), |
653 | 656 | ) |
654 | 657 | # Output is ordered so keep this at the end |
655 | | - requests: GenerativeRequestsBreakdown = Field( |
| 658 | + requests: StatusBreakdown[ |
| 659 | + List[GenerativeTextResponseStats], |
| 660 | + List[GenerativeTextErrorStats], |
| 661 | + List[GenerativeTextErrorStats] |
| 662 | + ] = Field( |
656 | 663 | description=( |
657 | 664 | "The breakdown of requests for the benchmark run including completed, " |
658 | 665 | "incomplete, and errored requests." |
@@ -905,7 +912,7 @@ def from_stats( |
905 | 912 | ], |
906 | 913 | ), |
907 | 914 | ), |
908 | | - requests=GenerativeRequestsBreakdown( |
| 915 | + requests=StatusBreakdown( |
909 | 916 | successful=successful, |
910 | 917 | incomplete=incomplete, |
911 | 918 | errored=errored, |
|
0 commit comments