11from __future__ import annotations
22from typing import Optional , Union , Iterable , Tuple , List
3+ from inspect import signature
34
45import numpy as np
56from joblib import Parallel , delayed
67from sklearn .utils import check_X_y , check_array
7- from sklearn .utils .validation import check_is_fitted
8+ from sklearn .utils .validation import check_is_fitted , _check_sample_weight
89from sklearn .base import clone
910from sklearn .base import BaseEstimator , RegressorMixin
1011from sklearn .linear_model import LinearRegression
@@ -276,14 +277,91 @@ def _check_alpha(self, alpha: Union[float, Iterable[float]]) -> np.ndarray:
276277 raise ValueError ("Invalid alpha. Allowed values are between 0 and 1." )
277278 return alpha_np
278279
280+ def _check_null_weight (
281+ self ,
282+ sample_weight : ArrayLike ,
283+ X : ArrayLike ,
284+ y : ArrayLike
285+ ) -> Tuple [ArrayLike , ArrayLike , ArrayLike ]:
286+ """
287+ Check sample weights and remove samples with null sample weights.
288+
289+ Parameters
290+ ----------
291+ sample_weight : ArrayLike
292+ Sample weights.
293+ X : ArrayLike
294+ Training samples.
295+ y : ArrayLike
296+ Training labels.
297+
298+ Returns
299+ -------
300+ sample_weight : ArrayLike
301+ Non-null sample weights.
302+ X : ArrayLike
303+ Training samples with non-null weights.
304+ y : ArrayLike
305+ Training labels with non-null weights.
306+ """
307+ if sample_weight is not None :
308+ sample_weight = _check_sample_weight (sample_weight , X )
309+ non_null_weight = sample_weight != 0
310+ X , y = X [non_null_weight , :], y [non_null_weight ]
311+ sample_weight = sample_weight [non_null_weight ]
312+ return sample_weight , X , y
313+
314+ def _fit_estimator (
315+ self ,
316+ estimator : RegressorMixin ,
317+ X : ArrayLike ,
318+ y : ArrayLike ,
319+ supports_sw : bool ,
320+ sample_weight : ArrayLike
321+ ) -> RegressorMixin :
322+ """
323+ Fit an estimator on training data by distinguishing two cases:
324+ - the estimator supports sample weights and sample weights are provided.
325+ - the estimator does not support samples weights or samples weights are not provided
326+
327+ Parameters
328+ ----------
329+ estimator : RegressorMixin
330+ Estimator to train.
331+
332+ X : ArrayLike of shape (n_samples, n_features)
333+ Input data.
334+
335+ y : ArrayLike of shape (n_samples,)
336+ Input labels.
337+
338+ supports_sw : bool
339+ Whether or not estimator supports sample weights.
340+
341+ sample_weight : ArrayLike of shape (n_samples,)
342+ Sample weights. If None, then samples are equally weighted. By default None.
343+
344+ Returns
345+ -------
346+ RegressorMixin
347+ Fitted estimator.
348+ """
349+ if sample_weight is not None and supports_sw :
350+ estimator .fit (X , y , sample_weight = sample_weight )
351+ else :
352+ estimator .fit (X , y )
353+ return estimator
354+
279355 def _fit_and_predict_oof_model (
280356 self ,
281357 estimator : RegressorMixin ,
282358 X : ArrayLike ,
283359 y : ArrayLike ,
284360 train_index : ArrayLike ,
285361 val_index : ArrayLike ,
286- k : int
362+ k : int ,
363+ supports_sw : bool ,
364+ sample_weight : Optional [ArrayLike ] = None
287365 ) -> Tuple [RegressorMixin , ArrayLike , ArrayLike , ArrayLike ]:
288366 """
289367 Fit a single out-of-fold model on a given training set and
@@ -309,6 +387,12 @@ def _fit_and_predict_oof_model(
309387 k : int
310388 Split identification number.
311389
390+ supports_sw : bool
391+ Whether or not estimator supports sample weights.
392+
393+ sample_weight : ArrayLike of shape (n_samples,)
394+ Sample weights. If None, then samples are equally weighted. By default None.
395+
312396 Returns
313397 -------
314398 Tuple[RegressorMixin, ArrayLike, ArrayLike, ArrayLike]
@@ -319,12 +403,13 @@ def _fit_and_predict_oof_model(
319403 - [3]: Validation data indices, of shapes (n_samples_val,)
320404 """
321405 X_train , y_train , X_val = X [train_index ], y [train_index ], X [val_index ]
322- estimator .fit (X_train , y_train )
406+ sample_weight_train = sample_weight [train_index ] if sample_weight is not None else None
407+ estimator = self ._fit_estimator (estimator , X_train , y_train , supports_sw , sample_weight_train )
323408 y_pred = estimator .predict (X_val )
324409 val_id = np .full_like (y_pred , k )
325410 return estimator , y_pred , val_id , val_index
326411
327- def fit (self , X : ArrayLike , y : ArrayLike ) -> MapieRegressor :
412+ def fit (self , X : ArrayLike , y : ArrayLike , sample_weight : Optional [ ArrayLike ] = None ) -> MapieRegressor :
328413 """
329414 Fit estimator and compute residuals used for prediction intervals.
330415 Fit the base estimator under the ``single_estimator_`` attribute.
@@ -339,6 +424,12 @@ def fit(self, X: ArrayLike, y: ArrayLike) -> MapieRegressor:
339424 y : ArrayLike of shape (n_samples,)
340425 Training labels.
341426
427+ sample_weight : ArrayLike of shape (n_samples,), default=None
428+ Sample weights for fitting the out-of-fold models. If None, then samples are equally weighted.
429+ If some weights are null, their corresponding observations are removed before the fitting process and
430+ hence have no residuals.
431+ If weights are non-uniform, residuals are still uniformly weighted.
432+
342433 Returns
343434 -------
344435 MapieRegressor
@@ -348,17 +439,20 @@ def fit(self, X: ArrayLike, y: ArrayLike) -> MapieRegressor:
348439 cv = self ._check_cv (self .cv )
349440 estimator = self ._check_estimator (self .estimator )
350441 X , y = check_X_y (X , y , force_all_finite = False , dtype = ["float64" , "object" ])
442+ fit_parameters = signature (estimator .fit ).parameters
443+ supports_sw = "sample_weight" in fit_parameters
444+ sample_weight , X , y = self ._check_null_weight (sample_weight , X , y )
351445 y_pred = np .empty_like (y , dtype = float )
352446 self .estimators_ : List [RegressorMixin ] = []
353447 self .n_features_in_ = X .shape [1 ]
354448 self .k_ = np .empty_like (y , dtype = int )
355- self .single_estimator_ = clone (estimator ). fit ( X , y )
449+ self .single_estimator_ = self . _fit_estimator ( clone (estimator ), X , y , supports_sw , sample_weight )
356450 if self .method == "naive" :
357451 y_pred = self .single_estimator_ .predict (X )
358452 else :
359453 cv_outputs = Parallel (n_jobs = self .n_jobs , verbose = self .verbose )(
360454 delayed (self ._fit_and_predict_oof_model )(
361- clone (estimator ), X , y , train_index , val_index , k
455+ clone (estimator ), X , y , train_index , val_index , k , supports_sw , sample_weight
362456 ) for k , (train_index , val_index ) in enumerate (cv .split (X ))
363457 )
364458 self .estimators_ , predictions , val_ids , val_indices = map (list , zip (* cv_outputs ))
0 commit comments