3
3
import numpy as np
4
4
from loguru import logger
5
5
6
+ from guidellm .core .serializable import Serializable
7
+
6
8
__all__ = ["Distribution" ]
7
9
8
10
9
- class Distribution :
11
+ class Distribution ( Serializable ) :
10
12
"""
11
- A class to represent a statistical distribution and perform various statistical
12
- analyses.
13
-
14
- :param data: List of numerical data points (int or float) to initialize the
15
- distribution.
16
- :type data: List[Union[int, float]], optional
13
+ A class to represent a statistical distribution and perform various
14
+ statistical analyses.
17
15
"""
18
16
19
- def __init__ (self , data : Optional [Union [List [int ], List [float ]]] = None ):
20
- """
21
- Initialize the Distribution with optional data.
22
-
23
- :param data: List of numerical data points to initialize the distribution,
24
- defaults to None.
25
- :type data: List[Union[int, float]], optional
26
- """
27
- self ._data = list (data ) if data else []
28
- logger .debug (f"Initialized Distribution with data: { self ._data } " )
17
+ def __init__ (self , ** data ):
18
+ super ().__init__ (** data )
19
+ logger .debug (f"Initialized Distribution with data: { self .data } " )
29
20
30
21
def __str__ (self ) -> str :
31
22
"""
32
23
Return a string representation of the Distribution.
33
-
34
- :return: String representation of the Distribution.
35
- :rtype: str
36
24
"""
37
25
return (
38
26
f"Distribution(mean={ self .mean :.2f} , median={ self .median :.2f} , "
39
- f"min={ self .min } , max={ self .max } , count={ len (self ._data )} )"
27
+ f"min={ self .min } , max={ self .max } , count={ len (self .data )} )"
40
28
)
41
29
42
30
def __repr__ (self ) -> str :
43
31
"""
44
32
Return an unambiguous string representation of the Distribution for debugging.
45
-
46
- :return: Unambiguous string representation of the Distribution.
47
- :rtype: str
48
- """
49
- return f"Distribution(data={ self ._data } )"
50
-
51
- @property
52
- def data (self ) -> List [Union [int , float ]]:
53
- """
54
- Return the data points of the distribution.
55
-
56
- :return: The data points of the distribution.
57
- :rtype: List[Union[int, float]]
58
33
"""
59
- return self ._data
34
+ return f"Distribution(data= { self .data } )"
60
35
61
36
@property
62
37
def mean (self ) -> float :
63
38
"""
64
39
Calculate and return the mean of the distribution.
65
-
66
40
:return: The mean of the distribution.
67
- :rtype: float
68
41
"""
69
- if not self ._data :
42
+ if not self .data :
70
43
logger .warning ("No data points available to calculate mean." )
71
44
return 0.0
72
45
73
- mean_value = np .mean (self ._data ).item ()
46
+ mean_value = np .mean (self .data ).item ()
74
47
logger .debug (f"Calculated mean: { mean_value } " )
75
48
return mean_value
76
49
77
50
@property
78
51
def median (self ) -> float :
79
52
"""
80
53
Calculate and return the median of the distribution.
81
-
82
54
:return: The median of the distribution.
83
- :rtype: float
84
55
"""
85
- if not self ._data :
56
+ if not self .data :
86
57
logger .warning ("No data points available to calculate median." )
87
58
return 0.0
88
59
89
- median_value = np .median (self ._data ).item ()
60
+ median_value = np .median (self .data ).item ()
90
61
logger .debug (f"Calculated median: { median_value } " )
91
62
return median_value
92
63
93
64
@property
94
65
def variance (self ) -> float :
95
66
"""
96
67
Calculate and return the variance of the distribution.
97
-
98
68
:return: The variance of the distribution.
99
- :rtype: float
100
69
"""
101
- if not self ._data :
70
+ if not self .data :
102
71
logger .warning ("No data points available to calculate variance." )
103
72
return 0.0
104
73
105
- variance_value = np .var (self ._data ).item ()
74
+ variance_value = np .var (self .data ).item ()
106
75
logger .debug (f"Calculated variance: { variance_value } " )
107
76
return variance_value
108
77
109
78
@property
110
79
def std_deviation (self ) -> float :
111
80
"""
112
81
Calculate and return the standard deviation of the distribution.
113
-
114
82
:return: The standard deviation of the distribution.
115
- :rtype: float
116
83
"""
117
- if not self ._data :
84
+ if not self .data :
118
85
logger .warning ("No data points available to calculate standard deviation." )
119
86
return 0.0
120
87
121
- std_deviation_value = np .std (self ._data ).item ()
88
+ std_deviation_value = np .std (self .data ).item ()
122
89
logger .debug (f"Calculated standard deviation: { std_deviation_value } " )
123
90
return std_deviation_value
124
91
125
92
def percentile (self , percentile : float ) -> float :
126
93
"""
127
94
Calculate and return the specified percentile of the distribution.
128
-
129
95
:param percentile: The desired percentile to calculate (0-100).
130
- :type percentile: float
131
96
:return: The specified percentile of the distribution.
132
- :rtype: float
133
97
"""
134
- if not self ._data :
98
+ if not self .data :
135
99
logger .warning ("No data points available to calculate percentile." )
136
100
return 0.0
137
101
@@ -142,61 +106,52 @@ def percentile(self, percentile: float) -> float:
142
106
def percentiles (self , percentiles : List [float ]) -> List [float ]:
143
107
"""
144
108
Calculate and return the specified percentiles of the distribution.
145
-
146
109
:param percentiles: A list of desired percentiles to calculate (0-100).
147
- :type percentiles: List[float]
148
110
:return: A list of the specified percentiles of the distribution.
149
- :rtype: List[float]
150
111
"""
151
- if not self ._data :
112
+ if not self .data :
152
113
logger .warning ("No data points available to calculate percentiles." )
153
114
return [0.0 ] * len (percentiles )
154
115
155
- percentiles_values = np .percentile (self ._data , percentiles ).tolist ()
116
+ percentiles_values = np .percentile (self .data , percentiles ).tolist ()
156
117
logger .debug (f"Calculated percentiles { percentiles } : { percentiles_values } " )
157
118
return percentiles_values
158
119
159
120
@property
160
121
def min (self ) -> float :
161
122
"""
162
123
Return the minimum value of the distribution.
163
-
164
124
:return: The minimum value of the distribution.
165
- :rtype: float
166
125
"""
167
- if not self ._data :
126
+ if not self .data :
168
127
logger .warning ("No data points available to calculate minimum." )
169
128
return 0.0
170
129
171
- min_value = np .min (self ._data )
130
+ min_value = np .min (self .data )
172
131
logger .debug (f"Calculated min: { min_value } " )
173
132
return min_value
174
133
175
134
@property
176
135
def max (self ) -> float :
177
136
"""
178
137
Return the maximum value of the distribution.
179
-
180
138
:return: The maximum value of the distribution.
181
- :rtype: float
182
139
"""
183
- if not self ._data :
140
+ if not self .data :
184
141
logger .warning ("No data points available to calculate maximum." )
185
142
return 0.0
186
143
187
- max_value = np .max (self ._data )
144
+ max_value = np .max (self .data )
188
145
logger .debug (f"Calculated max: { max_value } " )
189
146
return max_value
190
147
191
148
@property
192
149
def range (self ) -> float :
193
150
"""
194
151
Calculate and return the range of the distribution (max - min).
195
-
196
152
:return: The range of the distribution.
197
- :rtype: float
198
153
"""
199
- if not self ._data :
154
+ if not self .data :
200
155
logger .warning ("No data points available to calculate range." )
201
156
return 0.0
202
157
@@ -207,9 +162,7 @@ def range(self) -> float:
207
162
def describe (self ) -> dict :
208
163
"""
209
164
Return a dictionary describing various statistics of the distribution.
210
-
211
165
:return: A dictionary with statistical summaries of the distribution.
212
- :rtype: dict
213
166
"""
214
167
description = {
215
168
"mean" : self .mean ,
@@ -230,19 +183,15 @@ def describe(self) -> dict:
230
183
def add_data (self , new_data : Union [List [int ], List [float ]]):
231
184
"""
232
185
Add new data points to the distribution.
233
-
234
186
:param new_data: A list of new numerical data points to add.
235
- :type new_data: List[Union[int, float]]
236
187
"""
237
- self ._data .extend (new_data )
188
+ self .data .extend (new_data )
238
189
logger .debug (f"Added new data: { new_data } " )
239
190
240
191
def remove_data (self , remove_data : Union [List [int ], List [float ]]):
241
192
"""
242
193
Remove specified data points from the distribution.
243
-
244
194
:param remove_data: A list of numerical data points to remove.
245
- :type remove_data: List[Union[int, float]]
246
195
"""
247
- self ._data = [item for item in self ._data if item not in remove_data ]
196
+ self .data = [item for item in self .data if item not in remove_data ]
248
197
logger .debug (f"Removed data: { remove_data } " )
0 commit comments