12
12
class DensityRatioEstimator ():
13
13
""" Template class for density ratio estimation. """
14
14
15
- def __init__ (self ):
16
- pass
15
+ def __init__ (self ) -> None :
16
+ raise NotImplementedError
17
17
18
- def fit (self ):
19
- pass
18
+ def fit (self ) -> None :
19
+ raise NotImplementedError
20
20
21
- def predict (self ):
22
- pass
21
+ def predict (self ) -> None :
22
+ raise NotImplementedError
23
23
24
- def check_is_fitted (self ):
25
- pass
24
+ def check_is_fitted (self ) -> None :
25
+ raise NotImplementedError
26
26
27
27
28
28
class ProbClassificationDRE (DensityRatioEstimator ):
@@ -37,8 +37,8 @@ class ProbClassificationDRE(DensityRatioEstimator):
37
37
Parameters
38
38
----------
39
39
estimator: Optional[ClassifierMixin]
40
- Any classifier with scikit-learn API
41
- (i.e. with fit, predict, and predict_proba methods), by default ``None``.
40
+ Any classifier with scikit-learn API (i.e. with fit, predict, and
41
+ predict_proba methods), by default ``None``.
42
42
If ``None``, estimator defaults to a ``LogisticRegression`` instance.
43
43
44
44
clip_min: Optional[float]
@@ -56,11 +56,11 @@ class ProbClassificationDRE(DensityRatioEstimator):
56
56
Attributes
57
57
----------
58
58
source_prob: float
59
- The marginal probability of getting a datapoint from the source
59
+ The marginal probability of getting a datapoint from the source
60
60
distribution.
61
61
62
62
target_prob: float
63
- The marginal probability of getting a datapoint from the target
63
+ The marginal probability of getting a datapoint from the target
64
64
distribution.
65
65
66
66
References
@@ -80,14 +80,14 @@ def __init__(
80
80
81
81
self .estimator = self ._check_estimator (estimator )
82
82
83
- if self . clip_max is None :
83
+ if clip_max is None :
84
84
self .clip_max = 1
85
85
elif all ((clip_max >= 0 , clip_max <= 1 )):
86
86
self .clip_max = clip_max
87
87
else :
88
88
raise ValueError ("Expected `clip_max` to be between 0 and 1." )
89
89
90
- if self . clip_min is None :
90
+ if clip_min is None :
91
91
self .clip_min = 0
92
92
elif all ((clip_min >= 0 , clip_min <= clip_max )):
93
93
self .clip_min = clip_min
@@ -160,19 +160,19 @@ def fit(
160
160
161
161
source_prob: Optional[float]
162
162
The marginal probability of getting a datapoint from the source
163
- distribution. If ``None``, the proportion of source examples in
163
+ distribution. If ``None``, the proportion of source examples in
164
164
the training dataset is used.
165
165
166
166
By default ``None``.
167
167
168
168
target_prob: Optional[float]
169
169
The marginal probability of getting a datapoint from the target
170
- distribution. If ``None``, the proportion of target examples in
170
+ distribution. If ``None``, the proportion of target examples in
171
171
the training dataset is used.
172
172
173
173
By default ``None``.
174
174
175
- sample_weight : Optional[ArrayLike] of shape (n_source_samples + n_target_samples ,)
175
+ sample_weight : Optional[ArrayLike] of shape (n_source + n_target ,)
176
176
Sample weights for fitting the out-of-fold models.
177
177
If ``None``, then samples are equally weighted.
178
178
If some weights are null,
@@ -192,15 +192,18 @@ def fit(
192
192
n_target = X_target .shape [0 ]
193
193
194
194
if source_prob is None :
195
- source_prob = self . n_source / (self . n_source + self . n_target )
195
+ source_prob = n_source / (n_source + n_target )
196
196
197
197
if target_prob is None :
198
- target_prob = self . n_target / (self . n_source + self . n_target )
198
+ target_prob = n_target / (n_source + n_target )
199
199
200
200
if source_prob + target_prob != 1 :
201
201
raise ValueError (
202
202
"``source_prob`` and ``target_prob`` do not add up to 1." )
203
203
204
+ self .source_prob = source_prob
205
+ self .target_prob = target_prob
206
+
204
207
# Estimate the conditional probability of source/target given X.
205
208
X = np .concatenate ((X_source , X_target ), axis = 0 )
206
209
y = np .concatenate ((np .zeros (n_source ), np .ones (n_target )), axis = 0 )
@@ -243,9 +246,10 @@ def predict(
243
246
log_probs = np .clip (log_probs , a_min = np .log (
244
247
self .clip_min ), a_max = np .log (self .clip_max ))
245
248
246
- return np .exp (log_probs [:, 1 ] - log_probs [:, 0 ] + np .log (self .source_prob ) - np .log (self .target_prob ))
249
+ return np .exp (log_probs [:, 1 ] - log_probs [:, 0 ] +
250
+ np .log (self .source_prob ) - np .log (self .target_prob ))
247
251
248
- def check_is_fitted (self ):
252
+ def check_is_fitted (self ) -> None :
249
253
if isinstance (self .estimator , Pipeline ):
250
254
check_is_fitted (self .estimator [- 1 ])
251
255
else :
@@ -254,7 +258,7 @@ def check_is_fitted(self):
254
258
255
259
def calculate_ess (weights : ArrayLike ) -> float :
256
260
"""
257
- Calculates the effective sample size given importance weights for the
261
+ Calculates the effective sample size given importance weights for the
258
262
source distribution.
259
263
260
264
Parameters
0 commit comments