|
20 | 20 | from ._typing import ArrayLike
|
21 | 21 | from .aggregation_functions import aggregate_all, phi2D
|
22 | 22 | from .subsample import Subsample
|
23 |
| -from .dre import DensityRatioEstimator, ProbClassificationDRE |
24 | 23 | from .utils import (
|
25 | 24 | check_cv,
|
26 | 25 | check_alpha,
|
|
30 | 29 | check_nan_in_aposteriori_prediction,
|
31 | 30 | check_null_weight,
|
32 | 31 | check_verbose,
|
33 |
| - fit_estimator, |
34 |
| - empirical_quantile |
| 32 | + fit_estimator |
35 | 33 | )
|
36 | 34 |
|
37 | 35 |
|
@@ -678,360 +676,3 @@ def predict(
|
678 | 676 | if ensemble:
|
679 | 677 | y_pred = aggregate_all(self.agg_function, y_pred_multi)
|
680 | 678 | return y_pred, np.stack([y_pred_low, y_pred_up], axis=1)
|
681 |
| - |
682 |
| - |
683 |
| -class MapieCovShiftRegressor(MapieRegressor): # type: ignore |
684 |
| - """ |
685 |
| - Prediction interval with out-of-fold residuals. |
686 |
| -
|
687 |
| - This class implements the jackknife+ strategy and its variations |
688 |
| - for estimating prediction intervals on single-output data. The |
689 |
| - idea is to evaluate out-of-fold residuals on hold-out validation |
690 |
| - sets and to deduce valid confidence intervals with strong theoretical |
691 |
| - guarantees. |
692 |
| -
|
693 |
| - Parameters |
694 |
| - ---------- |
695 |
| - estimator : Optional[RegressorMixin] |
696 |
| - Any regressor with scikit-learn API |
697 |
| - (i.e. with fit and predict methods), by default ``None``. |
698 |
| - If ``None``, estimator defaults to a ``LinearRegression`` instance. |
699 |
| -
|
700 |
| - dr_estimator : Optional[DensityRatioEstimator] |
701 |
| - Any density ratio estimator with scikit-learn API |
702 |
| - (i.e. with fit and predict methods), by default ``None``. |
703 |
| - If ``None``, dr_estimator defaults to a ``ProbClassificationDRE`` |
704 |
| - instance with ``LogisticRegression`` model. |
705 |
| -
|
706 |
| - method: str, optional |
707 |
| - Method to choose for prediction interval estimates. |
708 |
| - Choose among: |
709 |
| -
|
710 |
| - - "naive", based on training set residuals, |
711 |
| - - "base", based on validation sets residuals, |
712 |
| - - "plus", based on validation residuals and testing predictions, |
713 |
| - - "minmax", based on validation residuals and testing predictions |
714 |
| - (min/max among cross-validation clones). |
715 |
| -
|
716 |
| - By default "plus". |
717 |
| -
|
718 |
| - cv: Optional[Union[int, str, BaseCrossValidator]] |
719 |
| - The cross-validation strategy for computing residuals. |
720 |
| - It directly drives the distinction between jackknife and cv variants. |
721 |
| - Choose among: |
722 |
| -
|
723 |
| - - ``None``, to use the default 5-fold cross-validation |
724 |
| - - integer, to specify the number of folds. |
725 |
| - If equal to -1, equivalent to |
726 |
| - ``sklearn.model_selection.LeaveOneOut()``. |
727 |
| - - CV splitter: any ``sklearn.model_selection.BaseCrossValidator`` |
728 |
| - Main variants are: |
729 |
| - - ``sklearn.model_selection.LeaveOneOut`` (jackknife), |
730 |
| - - ``sklearn.model_selection.KFold`` (cross-validation), |
731 |
| - - ``subsample.Subsample`` object (bootstrap). |
732 |
| - - ``"prefit"``, assumes that ``estimator`` has been fitted already, |
733 |
| - and the ``method`` parameter is ignored. |
734 |
| - All data provided in the ``fit`` method is then used |
735 |
| - for computing residuals only. |
736 |
| - At prediction time, quantiles of these residuals are used to provide |
737 |
| - a prediction interval with fixed width. |
738 |
| - The user has to take care manually that data for model fitting and |
739 |
| - residual estimate are disjoint. |
740 |
| -
|
741 |
| - By default ``None``. |
742 |
| -
|
743 |
| - n_jobs: Optional[int] |
744 |
| - Number of jobs for parallel processing using joblib |
745 |
| - via the "locky" backend. |
746 |
| - If ``-1`` all CPUs are used. |
747 |
| - If ``1`` is given, no parallel computing code is used at all, |
748 |
| - which is useful for debugging. |
749 |
| - For n_jobs below ``-1``, ``(n_cpus + 1 - n_jobs)`` are used. |
750 |
| - None is a marker for `unset` that will be interpreted as ``n_jobs=1`` |
751 |
| - (sequential execution). |
752 |
| -
|
753 |
| - By default ``None``. |
754 |
| -
|
755 |
| - agg_function : str |
756 |
| - Determines how to aggregate predictions from perturbed models, both at |
757 |
| - training and prediction time. |
758 |
| -
|
759 |
| - If ``None``, it is ignored except if cv class is ``Subsample``, |
760 |
| - in which case an error is raised. |
761 |
| - If "mean" or "median", returns the mean or median of the predictions |
762 |
| - computed from the out-of-folds models. |
763 |
| - Note: if you plan to set the ``ensemble`` argument to ``True`` in the |
764 |
| - ``predict`` method, you have to specify an aggregation function. |
765 |
| - Otherwise an error would be raised. |
766 |
| -
|
767 |
| - The Jackknife+ interval can be interpreted as an interval around the |
768 |
| - median prediction, and is guaranteed to lie inside the interval, |
769 |
| - unlike the single estimator predictions. |
770 |
| -
|
771 |
| - When the cross-validation strategy is Subsample (i.e. for the |
772 |
| - Jackknife+-after-Bootstrap method), this function is also used to |
773 |
| - aggregate the training set in-sample predictions. |
774 |
| -
|
775 |
| - If cv is ``"prefit"``, ``agg_function`` is ignored. |
776 |
| -
|
777 |
| - By default "mean". |
778 |
| -
|
779 |
| - verbose : int, optional |
780 |
| - The verbosity level, used with joblib for multiprocessing. |
781 |
| - The frequency of the messages increases with the verbosity level. |
782 |
| - If it more than ``10``, all iterations are reported. |
783 |
| - Above ``50``, the output is sent to stdout. |
784 |
| -
|
785 |
| - By default ``0``. |
786 |
| -
|
787 |
| - Attributes |
788 |
| - ---------- |
789 |
| - valid_methods: List[str] |
790 |
| - List of all valid methods. |
791 |
| -
|
792 |
| - single_estimator_ : sklearn.RegressorMixin |
793 |
| - Estimator fitted on the whole training set. |
794 |
| -
|
795 |
| - estimators_ : list |
796 |
| - List of out-of-folds estimators. |
797 |
| -
|
798 |
| - residuals_ : ArrayLike of shape (n_samples_train,) |
799 |
| - Residuals between ``y_train`` and ``y_pred``. |
800 |
| -
|
801 |
| - k_ : ArrayLike |
802 |
| - - Array of nans, of shape (len(y), 1) if cv is ``"prefit"`` |
803 |
| - (defined but not used) |
804 |
| - - Dummy array of folds containing each training sample, otherwise. |
805 |
| - Of shape (n_samples_train, cv.get_n_splits(X_train, y_train)). |
806 |
| -
|
807 |
| - n_features_in_: int |
808 |
| - Number of features passed to the fit method. |
809 |
| -
|
810 |
| - n_samples_: List[int] |
811 |
| - Number of samples passed to the fit method. |
812 |
| -
|
813 |
| - References |
814 |
| - ---------- |
815 |
| -
|
816 |
| - Examples |
817 |
| - -------- |
818 |
| -
|
819 |
| - """ |
820 |
| - valid_methods_ = ["naive", "base"] |
821 |
| - valid_agg_functions_ = [None, "median", "mean"] |
822 |
| - fit_attributes = [ |
823 |
| - "single_estimator_", |
824 |
| - "estimators_", |
825 |
| - "k_", |
826 |
| - "residuals_", |
827 |
| - "residuals_dre_", |
828 |
| - "n_features_in_", |
829 |
| - "n_samples_", |
830 |
| - ] |
831 |
| - |
832 |
| - def __init__( |
833 |
| - self, |
834 |
| - estimator: Optional[RegressorMixin] = None, |
835 |
| - dr_estimator: Optional[DensityRatioEstimator] = None, |
836 |
| - method: str = "base", |
837 |
| - cv: Optional[Union[int, str, BaseCrossValidator]] = None, |
838 |
| - n_jobs: Optional[int] = None, |
839 |
| - agg_function: Optional[str] = "mean", |
840 |
| - verbose: int = 0, |
841 |
| - ) -> None: |
842 |
| - self.dr_estimator = dr_estimator |
843 |
| - if cv != "prefit": |
844 |
| - raise NotImplementedError |
845 |
| - super().__init__( |
846 |
| - estimator=estimator, |
847 |
| - method=method, |
848 |
| - cv=cv, |
849 |
| - n_jobs=n_jobs, |
850 |
| - agg_function=agg_function, |
851 |
| - verbose=verbose, |
852 |
| - ) |
853 |
| - |
854 |
| - def _check_dr_estimator( |
855 |
| - self, |
856 |
| - dr_estimator: Optional[DensityRatioEstimator] = None |
857 |
| - ) -> DensityRatioEstimator: |
858 |
| - """ |
859 |
| - Check if estimator is ``None``, and returns a ``ProbClassificationDRE`` |
860 |
| - instance with ``LogisticRegression`` model if necessary. |
861 |
| - If the ``cv`` attribute is ``"prefit"``, check if estimator is indeed |
862 |
| - already fitted. |
863 |
| -
|
864 |
| - Parameters |
865 |
| - ---------- |
866 |
| - dr_estimator : Optional[DensityRatioEstimator], optional |
867 |
| - Estimator to check, by default ``None``. |
868 |
| -
|
869 |
| - Returns |
870 |
| - ------- |
871 |
| - DensityRatioEstimator |
872 |
| - The estimator itself or a default ``ProbClassificationDRE`` |
873 |
| - instance with ``LogisticRegression`` model. |
874 |
| -
|
875 |
| - Raises |
876 |
| - ------ |
877 |
| - ValueError |
878 |
| - If the estimator is not ``None`` |
879 |
| - and has no fit nor predict methods. |
880 |
| -
|
881 |
| - NotFittedError |
882 |
| - If the estimator is not fitted and ``cv`` attribute is "prefit". |
883 |
| - """ |
884 |
| - if dr_estimator is None: |
885 |
| - return ProbClassificationDRE(clip_min=0.01, clip_max=0.99) |
886 |
| - if not (hasattr(dr_estimator, "fit") and |
887 |
| - hasattr(dr_estimator, "predict")): |
888 |
| - raise ValueError( |
889 |
| - "Invalid estimator. " |
890 |
| - "Please provide a density ratio estimator with fit" |
891 |
| - "and predict methods." |
892 |
| - ) |
893 |
| - if self.cv == "prefit": |
894 |
| - dr_estimator.check_is_fitted() |
895 |
| - |
896 |
| - return dr_estimator |
897 |
| - |
898 |
| - def fit( |
899 |
| - self, |
900 |
| - X: ArrayLike, |
901 |
| - y: ArrayLike, |
902 |
| - sample_weight: Optional[ArrayLike] = None, |
903 |
| - ) -> MapieRegressor: |
904 |
| - """ |
905 |
| - Fit estimator and compute residuals used for prediction intervals. |
906 |
| - Fit the base estimator under the ``single_estimator_`` attribute. |
907 |
| - Fit all cross-validated estimator clones |
908 |
| - and rearrange them into a list, the ``estimators_`` attribute. |
909 |
| - Out-of-fold residuals are stored under the ``residuals_`` attribute. |
910 |
| -
|
911 |
| - Parameters |
912 |
| - ---------- |
913 |
| - X : ArrayLike of shape (n_samples, n_features) |
914 |
| - Training data. |
915 |
| -
|
916 |
| - y : ArrayLike of shape (n_samples,) |
917 |
| - Training labels. |
918 |
| -
|
919 |
| - sample_weight : Optional[ArrayLike] of shape (n_samples,) |
920 |
| - Sample weights for fitting the out-of-fold models. |
921 |
| - If None, then samples are equally weighted. |
922 |
| - If some weights are null, |
923 |
| - their corresponding observations are removed |
924 |
| - before the fitting process and hence have no residuals. |
925 |
| - If weights are non-uniform, residuals are still uniformly weighted. |
926 |
| -
|
927 |
| - By default ``None``. |
928 |
| -
|
929 |
| - Returns |
930 |
| - ------- |
931 |
| - MapieRegressor |
932 |
| - The model itself. |
933 |
| - """ |
934 |
| - super().fit(X=X, y=y, sample_weight=sample_weight) |
935 |
| - self.residuals_dre_ = self.dr_estimator.predict(X) |
936 |
| - |
937 |
| - def predict( |
938 |
| - self, |
939 |
| - X: ArrayLike, |
940 |
| - ensemble: bool = False, |
941 |
| - alpha: Optional[Union[float, Iterable[float]]] = None, |
942 |
| - ) -> Union[ArrayLike, Tuple[ArrayLike, ArrayLike]]: |
943 |
| - """ |
944 |
| - Predict target on new samples with confidence intervals. |
945 |
| - Residuals from the training set and predictions from the model clones |
946 |
| - are central to the computation. |
947 |
| - Prediction Intervals for a given ``alpha`` are deduced from either |
948 |
| -
|
949 |
| - - quantiles of residuals (naive and base methods), |
950 |
| - - quantiles of (predictions +/- residuals) (plus method), |
951 |
| - - quantiles of (max/min(predictions) +/- residuals) (minmax method). |
952 |
| -
|
953 |
| - Parameters |
954 |
| - ---------- |
955 |
| - X : ArrayLike of shape (n_samples, n_features) |
956 |
| - Test data. |
957 |
| -
|
958 |
| - ensemble: bool |
959 |
| - Boolean determining whether the predictions are ensembled or not. |
960 |
| - If False, predictions are those of the model trained on the whole |
961 |
| - training set. |
962 |
| - If True, predictions from perturbed models are aggregated by |
963 |
| - the aggregation function specified in the ``agg_function`` |
964 |
| - attribute. |
965 |
| -
|
966 |
| - If cv is ``"prefit"``, ``ensemble`` is ignored. |
967 |
| -
|
968 |
| - By default ``False``. |
969 |
| -
|
970 |
| - alpha: Optional[Union[float, Iterable[float]]] |
971 |
| - Can be a float, a list of floats, or a ``ArrayLike`` of floats. |
972 |
| - Between 0 and 1, represents the uncertainty of the confidence |
973 |
| - interval. |
974 |
| - Lower ``alpha`` produce larger (more conservative) prediction |
975 |
| - intervals. |
976 |
| - ``alpha`` is the complement of the target coverage level. |
977 |
| -
|
978 |
| - By default ``None``. |
979 |
| -
|
980 |
| - Returns |
981 |
| - ------- |
982 |
| - Union[ArrayLike, Tuple[ArrayLike, ArrayLike]] |
983 |
| -
|
984 |
| - - ArrayLike of shape (n_samples,) if alpha is None. |
985 |
| -
|
986 |
| - - Tuple[ArrayLike, ArrayLike] of shapes |
987 |
| - (n_samples,) and (n_samples, 2, n_alpha) if alpha is not None. |
988 |
| -
|
989 |
| - - [:, 0, :]: Lower bound of the prediction interval. |
990 |
| - - [:, 1, :]: Upper bound of the prediction interval. |
991 |
| - """ |
992 |
| - # Checks |
993 |
| - check_is_fitted(self, self.fit_attributes) |
994 |
| - self._check_ensemble(ensemble) |
995 |
| - alpha_ = check_alpha(alpha) |
996 |
| - |
997 |
| - y_pred = self.single_estimator_.predict(X) |
998 |
| - dre_pred = self.dr_estimator.predict(X) |
999 |
| - dre_calib = self.residuals_dre_ |
1000 |
| - |
1001 |
| - if alpha is None: |
1002 |
| - return np.array(y_pred) |
1003 |
| - else: |
1004 |
| - alpha_ = cast(ArrayLike, alpha_) |
1005 |
| - check_alpha_and_n_samples(alpha_, self.residuals_.shape[0]) |
1006 |
| - if self.method in ["naive", "base"] or self.cv == "prefit": |
1007 |
| - |
1008 |
| - # Denominator in weight calculation (array; differs based |
1009 |
| - # on each test point) |
1010 |
| - denom = dre_calib.sum() + dre_pred |
1011 |
| - |
1012 |
| - y_pred_low = np.empty( |
1013 |
| - (y_pred.shape[0], len(alpha_)), dtype=y_pred.dtype) |
1014 |
| - y_pred_up = np.empty_like(y_pred_low, dtype=y_pred.dtype) |
1015 |
| - for i in range(dre_pred.shape[0]): |
1016 |
| - |
1017 |
| - # Numerator in weight calculation |
1018 |
| - # Calibration (array) |
1019 |
| - cal_weights = dre_calib / denom[i] |
1020 |
| - # Test (float) |
1021 |
| - test_weight = dre_pred[i] / denom[i] |
1022 |
| - |
1023 |
| - # Calculate the quantile for constructing interval |
1024 |
| - quantile = empirical_quantile( |
1025 |
| - np.hstack([self.residuals_, np.array([np.inf])]), |
1026 |
| - alphas=1-alpha_, |
1027 |
| - weights=np.hstack( |
1028 |
| - [cal_weights, np.array([test_weight])]), |
1029 |
| - ) |
1030 |
| - |
1031 |
| - y_pred_low[i, :] = y_pred[i] - quantile |
1032 |
| - y_pred_up[i, :] = y_pred[i] + quantile |
1033 |
| - |
1034 |
| - else: |
1035 |
| - raise NotImplementedError |
1036 |
| - |
1037 |
| - return y_pred, np.stack([y_pred_low, y_pred_up], axis=1) |
0 commit comments