Skip to content

Commit c39946f

Browse files
jawadhussein462qroaValentin-Laurent
authored
REFACTOR: restucture the MapieQuantileRegressor Fit - Split the fit into prefit_estimators, fit_estimators and conformalize (#566)
* REFACTOR: restucture the MapieQuantileRegressor Fit - Split the fit into prefit_estimators, fit_estimators and conformalize Co-authored-by: qroa <[email protected]> Co-authored-by: Valentin Laurent <[email protected]>
1 parent e47171c commit c39946f

File tree

2 files changed

+130
-73
lines changed

2 files changed

+130
-73
lines changed

HISTORY.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ History
1313
* Fix issue 548 to correct labels generated in tutorial
1414
* Fix issue 547 to fix wrong warning
1515
* Fix issue 480 (correct display of mathematical equations in generated notebooks)
16-
* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit
16+
* Refactor MapieRegressor, EnsembleRegressor, and MapieQuantileRegressor, to prepare for the release of v1.0.0
1717

1818
0.9.1 (2024-09-13)
1919
------------------

mapie/regression/quantile_regression.py

Lines changed: 129 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import warnings
4-
from typing import Iterable, List, Optional, Tuple, Union, cast
4+
from typing import Iterable, Dict, List, Optional, Tuple, Union, cast
55

66
import numpy as np
77
from sklearn.base import RegressorMixin, clone
@@ -546,93 +546,150 @@ def fit(
546546
MapieQuantileRegressor
547547
The model itself.
548548
"""
549-
self.cv = self._check_cv(cast(str, self.cv))
550549

551-
# Initialization
552-
self.estimators_: List[RegressorMixin] = []
553-
if self.cv == "prefit":
554-
estimator = cast(List, self.estimator)
555-
alpha = self._check_alpha(self.alpha)
556-
self._check_prefit_params(estimator)
557-
X_calib, y_calib = indexable(X, y)
550+
self.init_fit()
558551

559-
self.n_calib_samples = _num_samples(y_calib)
560-
y_calib_preds = np.full(
561-
shape=(3, self.n_calib_samples),
562-
fill_value=np.nan
563-
)
564-
for i, est in enumerate(estimator):
565-
self.estimators_.append(est)
566-
y_calib_preds[i] = est.predict(X_calib).ravel()
567-
self.single_estimator_ = self.estimators_[2]
552+
if self.cv == "prefit":
553+
X_calib, y_calib = self.prefit_estimators(X, y)
568554
else:
569-
# Checks
570-
self._check_parameters()
571-
checked_estimator = self._check_estimator(self.estimator)
572-
alpha = self._check_alpha(self.alpha)
573-
X, y = indexable(X, y)
574-
random_state = check_random_state(random_state)
575-
results = self._check_calib_set(
576-
X,
577-
y,
578-
sample_weight,
579-
X_calib,
580-
y_calib,
581-
calib_size,
582-
random_state,
583-
shuffle,
584-
stratify,
555+
X_calib, y_calib = self.fit_estimators(
556+
X=X,
557+
y=y,
558+
sample_weight=sample_weight,
559+
groups=groups,
560+
X_calib=X_calib,
561+
y_calib=y_calib,
562+
calib_size=calib_size,
563+
random_state=random_state,
564+
shuffle=shuffle,
565+
stratify=stratify,
566+
**fit_params,
585567
)
586-
X_train, y_train, X_calib, y_calib, sample_weight_train = results
587-
X_train, y_train = indexable(X_train, y_train)
588-
X_calib, y_calib = indexable(X_calib, y_calib)
589-
y_train, y_calib = _check_y(y_train), _check_y(y_calib)
590-
self.n_calib_samples = _num_samples(y_calib)
591-
check_alpha_and_n_samples(self.alpha, self.n_calib_samples)
592-
sample_weight_train, X_train, y_train = check_null_weight(
593-
sample_weight_train,
568+
569+
self.conformalize(X_calib, y_calib)
570+
571+
return self
572+
573+
def init_fit(self):
574+
575+
self.cv = self._check_cv(cast(str, self.cv))
576+
self.alpha_np = self._check_alpha(self.alpha)
577+
self.estimators_: List[RegressorMixin] = []
578+
579+
def prefit_estimators(
580+
self,
581+
X: ArrayLike,
582+
y: ArrayLike
583+
) -> Tuple[ArrayLike, ArrayLike]:
584+
585+
estimator = cast(List, self.estimator)
586+
self._check_prefit_params(estimator)
587+
self.estimators_ = list(estimator)
588+
self.single_estimator_ = self.estimators_[2]
589+
590+
X_calib, y_calib = indexable(X, y)
591+
return X_calib, y_calib
592+
593+
def fit_estimators(
594+
self,
595+
X: ArrayLike,
596+
y: ArrayLike,
597+
sample_weight: Optional[ArrayLike] = None,
598+
groups: Optional[ArrayLike] = None,
599+
X_calib: Optional[ArrayLike] = None,
600+
y_calib: Optional[ArrayLike] = None,
601+
calib_size: Optional[float] = 0.3,
602+
random_state: Optional[Union[int, np.random.RandomState]] = None,
603+
shuffle: Optional[bool] = True,
604+
stratify: Optional[ArrayLike] = None,
605+
**fit_params,
606+
) -> Tuple[ArrayLike, ArrayLike]:
607+
608+
self._check_parameters()
609+
checked_estimator = self._check_estimator(self.estimator)
610+
random_state = check_random_state(random_state)
611+
X, y = indexable(X, y)
612+
613+
results = self._check_calib_set(
614+
X,
615+
y,
616+
sample_weight,
617+
X_calib,
618+
y_calib,
619+
calib_size,
620+
random_state,
621+
shuffle,
622+
stratify,
623+
)
624+
625+
X_train, y_train, X_calib, y_calib, sample_weight_train = results
626+
X_train, y_train = indexable(X_train, y_train)
627+
X_calib, y_calib = indexable(X_calib, y_calib)
628+
y_train, y_calib = _check_y(y_train), _check_y(y_calib)
629+
self.n_calib_samples = _num_samples(y_calib)
630+
check_alpha_and_n_samples(self.alpha, self.n_calib_samples)
631+
sample_weight_train, X_train, y_train = check_null_weight(
632+
sample_weight_train,
633+
X_train,
634+
y_train
635+
)
636+
y_train = cast(NDArray, y_train)
637+
638+
if isinstance(checked_estimator, Pipeline):
639+
estimator = checked_estimator[-1]
640+
else:
641+
estimator = checked_estimator
642+
name_estimator = estimator.__class__.__name__
643+
alpha_name = self.quantile_estimator_params[
644+
name_estimator
645+
]["alpha_name"]
646+
for i, alpha_ in enumerate(self.alpha_np):
647+
cloned_estimator_ = clone(checked_estimator)
648+
params = {alpha_name: alpha_}
649+
if isinstance(checked_estimator, Pipeline):
650+
cloned_estimator_[-1].set_params(**params)
651+
else:
652+
cloned_estimator_.set_params(**params)
653+
self.estimators_.append(fit_estimator(
654+
cloned_estimator_,
594655
X_train,
595-
y_train
656+
y_train,
657+
sample_weight_train,
658+
**fit_params,
659+
)
596660
)
597-
y_train = cast(NDArray, y_train)
661+
self.single_estimator_ = self.estimators_[2]
598662

599-
y_calib_preds = np.full(
663+
X_calib = cast(ArrayLike, X_calib)
664+
y_calib = cast(ArrayLike, y_calib)
665+
666+
return X_calib, y_calib
667+
668+
def conformalize(
669+
self,
670+
X_conf: ArrayLike,
671+
y_conf: ArrayLike,
672+
sample_weight: Optional[ArrayLike] = None,
673+
predict_params: Dict = {},
674+
):
675+
676+
self.n_calib_samples = _num_samples(y_conf)
677+
678+
y_calib_preds = np.full(
600679
shape=(3, self.n_calib_samples),
601680
fill_value=np.nan
602681
)
603682

604-
if isinstance(checked_estimator, Pipeline):
605-
estimator = checked_estimator[-1]
606-
else:
607-
estimator = checked_estimator
608-
name_estimator = estimator.__class__.__name__
609-
alpha_name = self.quantile_estimator_params[
610-
name_estimator
611-
]["alpha_name"]
612-
for i, alpha_ in enumerate(alpha):
613-
cloned_estimator_ = clone(checked_estimator)
614-
params = {alpha_name: alpha_}
615-
if isinstance(checked_estimator, Pipeline):
616-
cloned_estimator_[-1].set_params(**params)
617-
else:
618-
cloned_estimator_.set_params(**params)
619-
self.estimators_.append(fit_estimator(
620-
cloned_estimator_,
621-
X_train,
622-
y_train,
623-
sample_weight_train,
624-
**fit_params,
625-
)
626-
)
627-
y_calib_preds[i] = self.estimators_[-1].predict(X_calib)
628-
self.single_estimator_ = self.estimators_[2]
683+
for i, est in enumerate(self.estimators_):
684+
y_calib_preds[i] = est.predict(X_conf, **predict_params).ravel()
629685

630686
self.conformity_scores_ = np.full(
631687
shape=(3, self.n_calib_samples),
632688
fill_value=np.nan
633689
)
634-
self.conformity_scores_[0] = y_calib_preds[0] - y_calib
635-
self.conformity_scores_[1] = y_calib - y_calib_preds[1]
690+
691+
self.conformity_scores_[0] = y_calib_preds[0] - y_conf
692+
self.conformity_scores_[1] = y_conf - y_calib_preds[1]
636693
self.conformity_scores_[2] = np.max(
637694
[
638695
self.conformity_scores_[0],

0 commit comments

Comments
 (0)