99from sklearn .linear_model import LinearRegression
1010from sklearn .model_selection import BaseCrossValidator
1111from sklearn .pipeline import Pipeline
12- from sklearn .utils import check_array , check_X_y
13- from sklearn .utils .validation import check_is_fitted
12+ from sklearn .utils import _safe_indexing
13+ from sklearn .utils .validation import (
14+ indexable ,
15+ check_is_fitted ,
16+ _num_samples ,
17+ _check_y ,
18+ )
1419
1520from ._typing import ArrayLike
1621from .aggregation_functions import aggregate_all , phi2D
@@ -149,7 +154,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin): # type: ignore
149154 n_features_in_: int
150155 Number of features passed to the fit method.
151156
152- n_samples_val_ : List[int]
157+ n_samples_ : List[int]
153158 Number of samples passed to the fit method.
154159
155160 References
@@ -190,7 +195,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin): # type: ignore
190195 "k_" ,
191196 "residuals_" ,
192197 "n_features_in_" ,
193- "n_samples_val_ " ,
198+ "n_samples_ " ,
194199 ]
195200
196201 def __init__ (
@@ -341,7 +346,7 @@ def _fit_and_predict_oof_model(
341346 val_index : ArrayLike ,
342347 k : int ,
343348 sample_weight : Optional [ArrayLike ] = None ,
344- ) -> Tuple [RegressorMixin , ArrayLike , ArrayLike , ArrayLike ]:
349+ ) -> Tuple [RegressorMixin , ArrayLike , ArrayLike ]:
345350 """
346351 Fit a single out-of-fold model on a given training set and
347352 perform predictions on a test set.
@@ -372,30 +377,30 @@ def _fit_and_predict_oof_model(
372377
373378 Returns
374379 -------
375- Tuple[RegressorMixin, ArrayLike, ArrayLike, ArrayLike ]
380+ Tuple[RegressorMixin, ArrayLike, ArrayLike]
376381
377382 - [0]: Fitted estimator
378383 - [1]: Estimator predictions on the validation fold,
379384 of shape (n_samples_val,)
380- - [2]: Identification number of the validation fold,
381- of shape (n_samples_val,)
382385 - [3]: Validation data indices,
383386 of shape (n_samples_val,).
384387
385388 """
386- X_train , y_train , X_val = X [train_index ], y [train_index ], X [val_index ]
389+ X_train = _safe_indexing (X , train_index )
390+ y_train = _safe_indexing (y , train_index )
391+ X_val = _safe_indexing (X , val_index )
387392 if sample_weight is None :
388393 estimator = fit_estimator (estimator , X_train , y_train )
389394 else :
395+ sample_weight_train = _safe_indexing (sample_weight , train_index )
390396 estimator = fit_estimator (
391- estimator , X_train , y_train , sample_weight [ train_index ]
397+ estimator , X_train , y_train , sample_weight_train
392398 )
393- if X_val . shape [ 0 ] > 0 :
399+ if _num_samples ( X_val ) > 0 :
394400 y_pred = estimator .predict (X_val )
395401 else :
396402 y_pred = np .array ([])
397- val_id = np .full_like (y_pred , k , dtype = int )
398- return estimator , y_pred , val_id , val_index
403+ return estimator , y_pred , val_index
399404
400405 def aggregate_with_mask (self , x : ArrayLike , k : ArrayLike ) -> ArrayLike :
401406 """
@@ -479,9 +484,8 @@ def fit(
479484 cv = check_cv (self .cv )
480485 estimator = self ._check_estimator (self .estimator )
481486 agg_function = self ._check_agg_function (self .agg_function )
482- X , y = check_X_y (
483- X , y , force_all_finite = False , dtype = ["float64" , "int" , "object" ]
484- )
487+ X , y = indexable (X , y )
488+ y = _check_y (y )
485489 self .n_features_in_ = check_n_features_in (X , cv , estimator )
486490 sample_weight , X , y = check_null_weight (sample_weight , X , y )
487491
@@ -492,7 +496,7 @@ def fit(
492496 if cv == "prefit" :
493497 self .single_estimator_ = estimator
494498 y_pred = self .single_estimator_ .predict (X )
495- self .n_samples_val_ = [X . shape [ 0 ] ]
499+ self .n_samples_ = [_num_samples ( X ) ]
496500 self .k_ = np .full (
497501 shape = (len (y ), 1 ), fill_value = np .nan , dtype = float
498502 )
@@ -514,7 +518,7 @@ def fit(
514518 )
515519 if self .method == "naive" :
516520 y_pred = self .single_estimator_ .predict (X )
517- self .n_samples_val_ = [X . shape [ 0 ] ]
521+ self .n_samples_ = [_num_samples ( X ) ]
518522 else :
519523 outputs = Parallel (n_jobs = self .n_jobs , verbose = self .verbose )(
520524 delayed (self ._fit_and_predict_oof_model )(
@@ -528,22 +532,22 @@ def fit(
528532 )
529533 for k , (train_index , val_index ) in enumerate (cv .split (X ))
530534 )
531- self .estimators_ , predictions , val_ids , val_indices = map (
535+ self .estimators_ , predictions , val_indices = map (
532536 list , zip (* outputs )
533537 )
534538
535- self .n_samples_val_ = [
539+ self .n_samples_ = [
536540 np .array (pred ).shape [0 ] for pred in predictions
537541 ]
538542
539543 for i , val_ind in enumerate (val_indices ):
540- pred_matrix [val_ind , i ] = predictions [i ]
544+ pred_matrix [val_ind , i ] = np . array ( predictions [i ]). ravel ()
541545 self .k_ [val_ind , i ] = 1
542546 check_nan_in_aposteriori_prediction (pred_matrix )
543547
544548 y_pred = aggregate_all (agg_function , pred_matrix )
545549
546- self .residuals_ = np .abs (y - y_pred )
550+ self .residuals_ = np .abs (np . ravel ( y ) - y_pred )
547551 return self
548552
549553 def predict (
@@ -605,7 +609,6 @@ def predict(
605609 check_is_fitted (self , self .fit_attributes )
606610 self ._check_ensemble (ensemble )
607611 alpha_ = check_alpha (alpha )
608- X = check_array (X , force_all_finite = False , dtype = ["float64" , "object" ])
609612 y_pred = self .single_estimator_ .predict (X )
610613
611614 if alpha is None :
@@ -658,7 +661,7 @@ def predict(
658661 )
659662 for _alpha in alpha_
660663 ]
661- )
664+ ). data
662665 y_pred_up = np .column_stack (
663666 [
664667 np .quantile (
@@ -669,7 +672,8 @@ def predict(
669672 )
670673 for _alpha in alpha_
671674 ]
672- )
675+ ). data
673676 if ensemble :
674677 y_pred = aggregate_all (self .agg_function , y_pred_multi )
678+ np .stack ([y_pred_low , y_pred_up ], axis = 1 )
675679 return y_pred , np .stack ([y_pred_low , y_pred_up ], axis = 1 )
0 commit comments