1
+ """Base class for all objectives."""
2
+ from abc import ABC , abstractmethod
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from checkmates .problem_types import handle_problem_types
8
+ from checkmates .utils import classproperty
9
+
10
+
11
+ class ObjectiveBase (ABC ):
12
+ """Base class for all objectives."""
13
+
14
+ problem_types = None
15
+
16
+ @property
17
+ @classmethod
18
+ @abstractmethod
19
+ def name (cls ):
20
+ """Returns a name describing the objective."""
21
+
22
+ @property
23
+ @classmethod
24
+ @abstractmethod
25
+ def greater_is_better (cls ):
26
+ """Returns a boolean determining if a greater score indicates better model performance."""
27
+
28
+ @property
29
+ @classmethod
30
+ @abstractmethod
31
+ def score_needs_proba (cls ):
32
+ """Returns a boolean determining if the score() method needs probability estimates.
33
+
34
+ This should be true for objectives which work with predicted
35
+ probabilities, like log loss or AUC, and false for objectives
36
+ which compare predicted class labels to the actual labels, like
37
+ F1 or correlation.
38
+ """
39
+
40
+ @property
41
+ @classmethod
42
+ @abstractmethod
43
+ def perfect_score (cls ):
44
+ """Returns the score obtained by evaluating this objective on a perfect model."""
45
+
46
+ @property
47
+ @classmethod
48
+ @abstractmethod
49
+ def is_bounded_like_percentage (cls ):
50
+ """Returns whether this objective is bounded between 0 and 1, inclusive."""
51
+
52
+ @property
53
+ @classmethod
54
+ @abstractmethod
55
+ def expected_range (cls ):
56
+ """Returns the expected range of the objective, which is not necessarily the possible ranges.
57
+
58
+ For example, our expected R2 range is from [-1, 1], although the
59
+ actual range is (-inf, 1].
60
+ """
61
+
62
+ @classmethod
63
+ @abstractmethod
64
+ def objective_function (
65
+ cls ,
66
+ y_true ,
67
+ y_predicted ,
68
+ y_train = None ,
69
+ X = None ,
70
+ sample_weight = None ,
71
+ ):
72
+ """Computes the relative value of the provided predictions compared to the actual labels, according a specified metric.
73
+
74
+ Args:
75
+ y_predicted (pd.Series): Predicted values of length [n_samples]
76
+ y_true (pd.Series): Actual class labels of length [n_samples]
77
+ y_train (pd.Series): Observed training values of length [n_samples]
78
+ X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
79
+ sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
80
+
81
+ Returns:
82
+ Numerical value used to calculate score
83
+ """
84
+
85
+ @classproperty
86
+ def positive_only (cls ):
87
+ """If True, this objective is only valid for positive data. Defaults to False."""
88
+ return False
89
+
90
+ def score (self , y_true , y_predicted , y_train = None , X = None , sample_weight = None ):
91
+ """Returns a numerical score indicating performance based on the differences between the predicted and actual values.
92
+
93
+ Args:
94
+ y_predicted (pd.Series): Predicted values of length [n_samples]
95
+ y_true (pd.Series): Actual class labels of length [n_samples]
96
+ y_train (pd.Series): Observed training values of length [n_samples]
97
+ X (pd.DataFrame or np.ndarray): Extra data of shape [n_samples, n_features] necessary to calculate score
98
+ sample_weight (pd.DataFrame or np.ndarray): Sample weights used in computing objective value result
99
+
100
+ Returns:
101
+ score
102
+ """
103
+ if X is not None :
104
+ X = self ._standardize_input_type (X )
105
+ if y_train is not None :
106
+ y_train = self ._standardize_input_type (y_train )
107
+ y_true = self ._standardize_input_type (y_true )
108
+ y_predicted = self ._standardize_input_type (y_predicted )
109
+ self .validate_inputs (y_true , y_predicted )
110
+ return self .objective_function (
111
+ y_true ,
112
+ y_predicted ,
113
+ y_train = y_train ,
114
+ X = X ,
115
+ sample_weight = sample_weight ,
116
+ )
117
+
118
+ @staticmethod
119
+ def _standardize_input_type (input_data ):
120
+ """Standardize input to pandas for scoring.
121
+
122
+ Args:
123
+ input_data (list, pd.DataFrame, pd.Series, or np.ndarray): A matrix of predictions or predicted probabilities
124
+
125
+ Returns:
126
+ pd.DataFrame or pd.Series: a pd.Series, or pd.DataFrame object if predicted probabilities were provided.
127
+ """
128
+ if isinstance (input_data , (pd .Series , pd .DataFrame )):
129
+ return input_data
130
+ if isinstance (input_data , list ):
131
+ if isinstance (input_data [0 ], list ):
132
+ return pd .DataFrame (input_data )
133
+ return pd .Series (input_data )
134
+ if isinstance (input_data , np .ndarray ):
135
+ if len (input_data .shape ) == 1 :
136
+ return pd .Series (input_data )
137
+ return pd .DataFrame (input_data )
138
+
139
+ def validate_inputs (self , y_true , y_predicted ):
140
+ """Validates the input based on a few simple checks.
141
+
142
+ Args:
143
+ y_predicted (pd.Series, or pd.DataFrame): Predicted values of length [n_samples].
144
+ y_true (pd.Series): Actual class labels of length [n_samples].
145
+
146
+ Raises:
147
+ ValueError: If the inputs are malformed.
148
+ """
149
+ if y_predicted .shape [0 ] != y_true .shape [0 ]:
150
+ raise ValueError (
151
+ "Inputs have mismatched dimensions: y_predicted has shape {}, y_true has shape {}" .format (
152
+ len (y_predicted ),
153
+ len (y_true ),
154
+ ),
155
+ )
156
+ if len (y_true ) == 0 :
157
+ raise ValueError ("Length of inputs is 0" )
158
+
159
+ if isinstance (y_true , pd .DataFrame ):
160
+ y_true = y_true .to_numpy ().flatten ()
161
+ if np .isnan (y_true ).any () or np .isinf (y_true ).any ():
162
+ raise ValueError ("y_true contains NaN or infinity" )
163
+
164
+ if isinstance (y_predicted , pd .DataFrame ):
165
+ y_predicted = y_predicted .to_numpy ().flatten ()
166
+ if np .isnan (y_predicted ).any () or np .isinf (y_predicted ).any ():
167
+ raise ValueError ("y_predicted contains NaN or infinity" )
168
+ if self .score_needs_proba and np .any ([(y_predicted < 0 ) | (y_predicted > 1 )]):
169
+ raise ValueError (
170
+ "y_predicted contains probability estimates not within [0, 1]" ,
171
+ )
172
+
173
+ @classmethod
174
+ def calculate_percent_difference (cls , score , baseline_score ):
175
+ """Calculate the percent difference between scores.
176
+
177
+ Args:
178
+ score (float): A score. Output of the score method of this objective.
179
+ baseline_score (float): A score. Output of the score method of this objective. In practice,
180
+ this is the score achieved on this objective with a baseline estimator.
181
+
182
+ Returns:
183
+ float: The percent difference between the scores. Note that for objectives that can be interpreted
184
+ as percentages, this will be the difference between the reference score and score. For all other
185
+ objectives, the difference will be normalized by the reference score.
186
+ """
187
+ if pd .isna (score ) or pd .isna (baseline_score ):
188
+ return np .nan
189
+
190
+ if np .isclose (baseline_score - score , 0 , atol = 1e-10 ):
191
+ return 0
192
+
193
+ # Return inf when dividing by 0
194
+ if (
195
+ np .isclose (baseline_score , 0 , atol = 1e-10 )
196
+ and not cls .is_bounded_like_percentage
197
+ ):
198
+ return np .inf
199
+
200
+ decrease = False
201
+ if (baseline_score > score and cls .greater_is_better ) or (
202
+ baseline_score < score and not cls .greater_is_better
203
+ ):
204
+ decrease = True
205
+
206
+ difference = baseline_score - score
207
+ change = (
208
+ difference
209
+ if cls .is_bounded_like_percentage
210
+ else difference / baseline_score
211
+ )
212
+ return 100 * (- 1 ) ** (decrease ) * np .abs (change )
213
+
214
+ @classmethod
215
+ def is_defined_for_problem_type (cls , problem_type ):
216
+ """Returns whether or not an objective is defined for a problem type."""
217
+ return handle_problem_types (problem_type ) in cls .problem_types
0 commit comments