Skip to content

Commit e47171c

Browse files
REFACTO: split MapieRegressor.fit into .init_fit, .fit_estimator, and .conformalize, split EnsembleRegressor.fit into .fit_single_estimator and .fit_multi_estimators, remove EnsembleEstimator useless interface (#564)
REFACTO: split MapieRegressor.fit into .init_fit, .fit_estimator, and .conformalize, split EnsembleRegressor .fit into .fit_single_estimator and .fit_multi_estimators, remove EnsembleEstimator useless interface
1 parent 6512312 commit e47171c

File tree

7 files changed

+170
-85
lines changed

7 files changed

+170
-85
lines changed

HISTORY.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ History
1212
* Fix issue 528 to correct broken ENS image in the documentation
1313
* Fix issue 548 to correct labels generated in tutorial
1414
* Fix issue 547 to fix wrong warning
15+
* Fix issue 480 (correct display of mathematical equations in generated notebooks)
16+
* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit
1517

1618
0.9.1 (2024-09-13)
1719
------------------

mapie/estimator/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
from .interface import EnsembleEstimator
21
from .regressor import EnsembleRegressor
32
from .classifier import EnsembleClassifier
43

54
__all__ = [
6-
"EnsembleEstimator",
75
"EnsembleRegressor",
86
"EnsembleClassifier",
97
]

mapie/estimator/classifier.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@
1010
from sklearn.utils.validation import _num_samples, check_is_fitted
1111

1212
from mapie._typing import ArrayLike, NDArray
13-
from mapie.estimator.interface import EnsembleEstimator
1413
from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes
1514

1615

17-
class EnsembleClassifier(EnsembleEstimator):
16+
class EnsembleClassifier:
1817
"""
1918
This class implements methods to handle the training and usage of the
2019
estimator. This estimator can be unique or composed by cross validated

mapie/estimator/interface.py

Lines changed: 0 additions & 40 deletions
This file was deleted.

mapie/estimator/regressor.py

Lines changed: 88 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,16 @@
66
from joblib import Parallel, delayed
77
from sklearn.base import RegressorMixin, clone
88
from sklearn.model_selection import BaseCrossValidator
9-
from sklearn.utils import _safe_indexing
9+
from sklearn.utils import _safe_indexing, deprecated
1010
from sklearn.utils.validation import _num_samples, check_is_fitted
1111

1212
from mapie._typing import ArrayLike, NDArray
1313
from mapie.aggregation_functions import aggregate_all, phi2D
14-
from mapie.estimator.interface import EnsembleEstimator
1514
from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv,
1615
fit_estimator)
1716

1817

19-
class EnsembleRegressor(EnsembleEstimator):
18+
class EnsembleRegressor:
2019
"""
2120
This class implements methods to handle the training and usage of the
2221
estimator. This estimator can be unique or composed by cross validated
@@ -409,6 +408,11 @@ def predict_calib(
409408

410409
return y_pred
411410

411+
@deprecated(
412+
"WARNING: EnsembleRegressor.fit is deprecated."
413+
"Instead use EnsembleRegressor.fit_single_estimator"
414+
"then EnsembleRegressor.fit_multi_estimators"
415+
)
412416
def fit(
413417
self,
414418
X: ArrayLike,
@@ -451,42 +455,60 @@ def fit(
451455
EnsembleRegressor
452456
The estimator fitted.
453457
"""
454-
# Initialization
455-
single_estimator_: RegressorMixin
456-
estimators_: List[RegressorMixin] = []
457-
full_indexes = np.arange(_num_samples(X))
458-
cv = self.cv
459-
self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
460-
estimator = self.estimator
458+
self.fit_single_estimator(
459+
X,
460+
y,
461+
sample_weight,
462+
groups,
463+
**fit_params
464+
)
465+
466+
self.fit_multi_estimators(
467+
X,
468+
y,
469+
sample_weight,
470+
groups,
471+
**fit_params
472+
)
473+
474+
return self
475+
476+
def fit_multi_estimators(
477+
self,
478+
X: ArrayLike,
479+
y: ArrayLike,
480+
sample_weight: Optional[ArrayLike] = None,
481+
groups: Optional[ArrayLike] = None,
482+
**fit_params
483+
) -> EnsembleRegressor:
484+
461485
n_samples = _num_samples(y)
486+
estimators: List[RegressorMixin] = []
462487

463-
# Computation
464-
if cv == "prefit":
465-
single_estimator_ = estimator
488+
if self.cv == "prefit":
489+
490+
# Create a placeholder attribute 'k_' filled with NaN values
491+
# This attribute is defined for consistency but
492+
# is not used in prefit mode
466493
self.k_ = np.full(
467494
shape=(n_samples, 1), fill_value=np.nan, dtype=float
468495
)
496+
469497
else:
470-
single_estimator_ = self._fit_oof_estimator(
471-
clone(estimator),
472-
X,
473-
y,
474-
full_indexes,
475-
sample_weight,
476-
**fit_params
477-
)
478-
cv = cast(BaseCrossValidator, cv)
498+
cv = cast(BaseCrossValidator, self.cv)
479499
self.k_ = np.full(
480500
shape=(n_samples, cv.get_n_splits(X, y, groups)),
481501
fill_value=np.nan,
482502
dtype=float,
483503
)
484-
if self.method == "naive":
485-
estimators_ = [single_estimator_]
486-
else:
487-
estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(
504+
505+
if self.method != "naive":
506+
estimators = Parallel(
507+
self.n_jobs,
508+
verbose=self.verbose
509+
)(
488510
delayed(self._fit_oof_estimator)(
489-
clone(estimator),
511+
clone(self.estimator),
490512
X,
491513
y,
492514
train_index,
@@ -495,13 +517,47 @@ def fit(
495517
)
496518
for train_index, _ in cv.split(X, y, groups)
497519
)
498-
# In split-CP, we keep only the model fitted on train dataset
499-
if self.use_split_method_:
500-
single_estimator_ = estimators_[0]
501520

502-
self.single_estimator_ = single_estimator_
503-
self.estimators_ = estimators_
521+
self.estimators_ = estimators
522+
523+
return self
524+
525+
def fit_single_estimator(
526+
self,
527+
X: ArrayLike,
528+
y: ArrayLike,
529+
sample_weight: Optional[ArrayLike] = None,
530+
groups: Optional[ArrayLike] = None,
531+
**fit_params
532+
) -> EnsembleRegressor:
533+
534+
self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
535+
single_estimator_: RegressorMixin
536+
537+
if self.cv == "prefit":
538+
single_estimator_ = self.estimator
539+
else:
540+
cv = cast(BaseCrossValidator, self.cv)
541+
if self.use_split_method_:
542+
train_indexes = [
543+
train_index for train_index, test_index in cv.split(
544+
X, y, groups)
545+
][0]
546+
indexes = train_indexes
547+
else:
548+
full_indexes = np.arange(_num_samples(X))
549+
indexes = full_indexes
550+
551+
single_estimator_ = self._fit_oof_estimator(
552+
clone(self.estimator),
553+
X,
554+
y,
555+
indexes,
556+
sample_weight,
557+
**fit_params
558+
)
504559

560+
self.single_estimator_ = single_estimator_
505561
return self
506562

507563
def predict(

mapie/regression/regression.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -513,12 +513,26 @@ def fit(
513513
MapieRegressor
514514
The model itself.
515515
"""
516-
fit_params = kwargs.pop('fit_params', {})
517-
predict_params = kwargs.pop('predict_params', {})
518-
if len(predict_params) > 0:
519-
self._predict_params = True
520-
else:
521-
self._predict_params = False
516+
517+
X, y, sample_weight, groups = self.init_fit(
518+
X, y, sample_weight, groups, **kwargs
519+
)
520+
521+
self.fit_estimator(X, y, sample_weight, groups)
522+
self.conformalize(X, y, sample_weight, groups, **kwargs)
523+
524+
return self
525+
526+
def init_fit(
527+
self,
528+
X: ArrayLike,
529+
y: ArrayLike,
530+
sample_weight: Optional[ArrayLike] = None,
531+
groups: Optional[ArrayLike] = None,
532+
**kwargs: Any
533+
):
534+
535+
self._fit_params = kwargs.pop('fit_params', {})
522536

523537
# Checks
524538
(estimator,
@@ -540,9 +554,47 @@ def fit(
540554
self.test_size,
541555
self.verbose
542556
)
543-
# Fit the prediction function
544-
self.estimator_ = self.estimator_.fit(
545-
X, y, sample_weight=sample_weight, groups=groups, **fit_params
557+
558+
return (
559+
X, y, sample_weight, groups
560+
)
561+
562+
def fit_estimator(
563+
self,
564+
X: ArrayLike,
565+
y: ArrayLike,
566+
sample_weight: Optional[ArrayLike] = None,
567+
groups: Optional[ArrayLike] = None,
568+
) -> MapieRegressor:
569+
570+
self.estimator_.fit_single_estimator(
571+
X,
572+
y,
573+
sample_weight=sample_weight,
574+
groups=groups,
575+
**self._fit_params
576+
)
577+
578+
return self
579+
580+
def conformalize(
581+
self,
582+
X: ArrayLike,
583+
y: ArrayLike,
584+
sample_weight: Optional[ArrayLike] = None,
585+
groups: Optional[ArrayLike] = None,
586+
**kwargs: Any
587+
) -> MapieRegressor:
588+
589+
predict_params = kwargs.pop('predict_params', {})
590+
self._predict_params = len(predict_params) > 0
591+
592+
self.estimator_.fit_multi_estimators(
593+
X,
594+
y,
595+
sample_weight,
596+
groups,
597+
**self._fit_params
546598
)
547599

548600
# Predict on calibration data

mapie/tests/test_regression.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,3 +1036,21 @@ def test_check_change_method_to_base(method: str, cv: str) -> None:
10361036
)
10371037
mapie_reg.fit(X_val, y_val)
10381038
assert mapie_reg.method == "base"
1039+
1040+
1041+
def test_deprecated_ensemble_regressor_fit_warning() -> None:
1042+
ens_reg = EnsembleRegressor(
1043+
LinearRegression(),
1044+
"plus",
1045+
KFold(n_splits=5, random_state=None, shuffle=True),
1046+
"nonsense",
1047+
None,
1048+
random_state,
1049+
0.20,
1050+
False
1051+
)
1052+
with pytest.warns(
1053+
FutureWarning,
1054+
match=r".WARNING: EnsembleRegressor.fit is deprecated.*"
1055+
):
1056+
ens_reg.fit(X, y)

0 commit comments

Comments
 (0)