scikit-learn-contrib
diff --git a/‎examples/regression/4-covariate-shift/paper_replication.ipynb
Lines changed: 545 additions & 0 deletions b/‎examples/regression/4-covariate-shift/paper_replication.ipynb
Lines changed: 545 additions & 0 deletions
diff --git a/‎mapie/dre.py
100755100644
Lines changed: 26 additions & 22 deletions b/‎mapie/dre.py
100755100644
Lines changed: 26 additions & 22 deletions
@@ -12,17 +12,17 @@
 class DensityRatioEstimator():
     """ Template class for density ratio estimation. """
 
-    def __init__(self):
-        pass
+    def __init__(self) -> None:
+        raise NotImplementedError
 
-    def fit(self):
-        pass
+    def fit(self) -> None:
+        raise NotImplementedError
 
-    def predict(self):
-        pass
+    def predict(self) -> None:
+        raise NotImplementedError
 
-    def check_is_fitted(self):
-        pass
+    def check_is_fitted(self) -> None:
+        raise NotImplementedError
 
 
 class ProbClassificationDRE(DensityRatioEstimator):
@@ -37,8 +37,8 @@ class ProbClassificationDRE(DensityRatioEstimator):
     Parameters
     ----------
     estimator: Optional[ClassifierMixin]
-        Any classifier with scikit-learn API
-        (i.e. with fit, predict, and predict_proba methods), by default ``None``.
+        Any classifier with scikit-learn API (i.e. with fit, predict, and
+        predict_proba methods), by default ``None``.
         If ``None``, estimator defaults to a ``LogisticRegression`` instance.
 
     clip_min: Optional[float]
@@ -56,11 +56,11 @@ class ProbClassificationDRE(DensityRatioEstimator):
     Attributes
     ----------
     source_prob: float
-        The marginal probability of getting a datapoint from the source 
+        The marginal probability of getting a datapoint from the source
         distribution.
 
     target_prob: float
-        The marginal probability of getting a datapoint from the target 
+        The marginal probability of getting a datapoint from the target
         distribution.
 
     References
@@ -80,14 +80,14 @@ def __init__(
 
         self.estimator = self._check_estimator(estimator)
 
-        if self.clip_max is None:
+        if clip_max is None:
             self.clip_max = 1
         elif all((clip_max >= 0, clip_max <= 1)):
             self.clip_max = clip_max
         else:
             raise ValueError("Expected `clip_max` to be between 0 and 1.")
 
-        if self.clip_min is None:
+        if clip_min is None:
             self.clip_min = 0
         elif all((clip_min >= 0, clip_min <= clip_max)):
             self.clip_min = clip_min
@@ -160,19 +160,19 @@ def fit(
 
         source_prob: Optional[float]
             The marginal probability of getting a datapoint from the source
-            distribution. If ``None``, the proportion of source examples in 
+            distribution. If ``None``, the proportion of source examples in
             the training dataset is used.
 
             By default ``None``.
 
         target_prob: Optional[float]
             The marginal probability of getting a datapoint from the target
-            distribution. If ``None``, the proportion of target examples in 
+            distribution. If ``None``, the proportion of target examples in
             the training dataset is used.
 
             By default ``None``.
 
-        sample_weight : Optional[ArrayLike] of shape (n_source_samples + n_target_samples,)
+        sample_weight : Optional[ArrayLike] of shape (n_source + n_target,)
             Sample weights for fitting the out-of-fold models.
             If ``None``, then samples are equally weighted.
             If some weights are null,
@@ -192,15 +192,18 @@ def fit(
         n_target = X_target.shape[0]
 
         if source_prob is None:
-            source_prob = self.n_source/(self.n_source + self.n_target)
+            source_prob = n_source/(n_source + n_target)
 
         if target_prob is None:
-            target_prob = self.n_target/(self.n_source + self.n_target)
+            target_prob = n_target/(n_source + n_target)
 
         if source_prob + target_prob != 1:
             raise ValueError(
                 "``source_prob`` and ``target_prob`` do not add up to 1.")
 
+        self.source_prob = source_prob
+        self.target_prob = target_prob
+
         # Estimate the conditional probability of source/target given X.
         X = np.concatenate((X_source, X_target), axis=0)
         y = np.concatenate((np.zeros(n_source), np.ones(n_target)), axis=0)
@@ -243,9 +246,10 @@ def predict(
         log_probs = np.clip(log_probs, a_min=np.log(
             self.clip_min), a_max=np.log(self.clip_max))
 
-        return np.exp(log_probs[:, 1] - log_probs[:, 0] + np.log(self.source_prob) - np.log(self.target_prob))
+        return np.exp(log_probs[:, 1] - log_probs[:, 0] +
+                      np.log(self.source_prob) - np.log(self.target_prob))
 
-    def check_is_fitted(self):
+    def check_is_fitted(self) -> None:
         if isinstance(self.estimator, Pipeline):
             check_is_fitted(self.estimator[-1])
         else:
@@ -254,7 +258,7 @@ def check_is_fitted(self):
 
 def calculate_ess(weights: ArrayLike) -> float:
     """
-    Calculates the effective sample size given importance weights for the 
+    Calculates the effective sample size given importance weights for the
     source distribution.
 
     Parameters