diff --git a/Makefile b/Makefile index 14ba2e5af..8414450a7 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ ### Checks that are run in GitHub CI ### lint: - flake8 examples mapie notebooks tests_v1 --max-line-length=88 + flake8 examples mapie notebooks --max-line-length=88 type-check: mypy mapie @@ -14,7 +14,7 @@ coverage: --cov-branch \ --cov=mapie \ --cov-report term-missing \ - --pyargs mapie tests_v1 \ + --pyargs mapie \ --cov-fail-under=100 \ --no-cov-on-fail \ --doctest-modules @@ -37,7 +37,6 @@ all-checks: tests: pytest -vs --doctest-modules mapie - python -m pytest -vs tests_v1 clean-doc: $(MAKE) clean -C doc diff --git a/mapie/tests/test_common.py b/mapie/tests/test_common.py index 39156694e..701086416 100644 --- a/mapie/tests/test_common.py +++ b/mapie/tests/test_common.py @@ -4,15 +4,221 @@ import numpy as np import pytest from sklearn.base import BaseEstimator +from sklearn.datasets import make_regression, make_classification +from sklearn.dummy import DummyRegressor, DummyClassifier from sklearn.exceptions import NotFittedError -from sklearn.linear_model import LinearRegression, LogisticRegression -from sklearn.model_selection import KFold +from sklearn.linear_model import LinearRegression, LogisticRegression, QuantileRegressor +from sklearn.model_selection import KFold, train_test_split from sklearn.pipeline import make_pipeline from sklearn.utils.validation import check_is_fitted -from mapie.classification import _MapieClassifier -from mapie.regression.regression import _MapieRegressor -from mapie.regression.quantile_regression import _MapieQuantileRegressor +from mapie.classification import _MapieClassifier, SplitConformalClassifier, \ + CrossConformalClassifier +from mapie.regression.regression import _MapieRegressor, SplitConformalRegressor, \ + CrossConformalRegressor, JackknifeAfterBootstrapRegressor +from mapie.regression.quantile_regression import _MapieQuantileRegressor, \ + ConformalizedQuantileRegressor + +RANDOM_STATE = 1 + + +@pytest.fixture(scope="module") +def dataset_regression(): + X, y = make_regression( + n_samples=500, n_features=2, noise=1.0, random_state=RANDOM_STATE + ) + X_train, X_conf_test, y_train, y_conf_test = train_test_split( + X, y, random_state=RANDOM_STATE + ) + X_conformalize, X_test, y_conformalize, y_test = train_test_split( + X_conf_test, y_conf_test, random_state=RANDOM_STATE + ) + return X_train, X_conformalize, X_test, y_train, y_conformalize, y_test + + +@pytest.fixture(scope="module") +def dataset_classification(): + X, y = make_classification( + n_samples=500, n_informative=5, n_classes=4, random_state=RANDOM_STATE, + ) + X_train, X_conf_test, y_train, y_conf_test = train_test_split( + X, y, random_state=RANDOM_STATE + ) + X_conformalize, X_test, y_conformalize, y_test = train_test_split( + X_conf_test, y_conf_test, random_state=RANDOM_STATE + ) + return X_train, X_conformalize, X_test, y_train, y_conformalize, y_test + + +def test_scr_same_predictions_prefit_not_prefit(dataset_regression) -> None: + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = ( + dataset_regression) + regressor = LinearRegression() + regressor.fit(X_train, y_train) + scr_prefit = SplitConformalRegressor(estimator=regressor, prefit=True) + scr_prefit.conformalize(X_conformalize, y_conformalize) + predictions_scr_prefit = scr_prefit.predict_interval(X_test) + + scr_not_prefit = SplitConformalRegressor(estimator=LinearRegression(), prefit=False) + scr_not_prefit.fit(X_train, y_train).conformalize(X_conformalize, y_conformalize) + predictions_scr_not_prefit = scr_not_prefit.predict_interval(X_test) + np.testing.assert_equal(predictions_scr_prefit, predictions_scr_not_prefit) + + +@pytest.mark.parametrize( + "split_technique,predict_method,dataset,estimator_class", + [ + ( + SplitConformalRegressor, + "predict_interval", + "dataset_regression", + DummyRegressor + ), + ( + ConformalizedQuantileRegressor, + "predict_interval", + "dataset_regression", + QuantileRegressor + ), + ( + SplitConformalClassifier, + "predict_set", + "dataset_classification", + DummyClassifier + ) + ] +) +class TestWrongMethodsOrderRaisesErrorForSplitTechniques: + def test_with_prefit_false( + self, + split_technique, + predict_method, + dataset, + estimator_class, + request + ): + dataset = request.getfixturevalue(dataset) + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset + estimator = estimator_class() + technique = split_technique(estimator=estimator, prefit=False) + + with pytest.raises(ValueError, match=r"call fit before calling conformalize"): + technique.conformalize( + X_conformalize, + y_conformalize + ) + + technique.fit(X_train, y_train) + + with pytest.raises(ValueError, match=r"fit method already called"): + technique.fit(X_train, y_train) + with pytest.raises( + ValueError, + match=r"call conformalize before calling predict" + ): + technique.predict(X_test) + + with pytest.raises( + ValueError, + match=f"call conformalize before calling {predict_method}" + ): + getattr(technique, predict_method)(X_test) + + technique.conformalize(X_conformalize, y_conformalize) + + with pytest.raises(ValueError, match=r"conformalize method already called"): + technique.conformalize(X_conformalize, y_conformalize) + + def test_with_prefit_true( + self, + split_technique, + predict_method, + dataset, + estimator_class, + request + ): + dataset = request.getfixturevalue(dataset) + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset + estimator = estimator_class() + estimator.fit(X_train, y_train) + + if split_technique == ConformalizedQuantileRegressor: + technique = split_technique(estimator=[estimator] * 3, prefit=True) + else: + technique = split_technique(estimator=estimator, prefit=True) + + with pytest.raises(ValueError, match=r"The fit method must be skipped"): + technique.fit(X_train, y_train) + with pytest.raises( + ValueError, + match=r"call conformalize before calling predict" + ): + technique.predict(X_test) + + with pytest.raises( + ValueError, + match=f"call conformalize before calling {predict_method}" + ): + getattr(technique, predict_method)(X_test) + + technique.conformalize(X_conformalize, y_conformalize) + + with pytest.raises(ValueError, match=r"conformalize method already called"): + technique.conformalize(X_conformalize, y_conformalize) + + +@pytest.mark.parametrize( + "cross_technique,predict_method,dataset,estimator_class", + [ + ( + CrossConformalRegressor, + "predict_interval", + "dataset_regression", + DummyRegressor + ), + ( + JackknifeAfterBootstrapRegressor, + "predict_interval", + "dataset_regression", + DummyRegressor + ), + ( + CrossConformalClassifier, + "predict_set", + "dataset_classification", + DummyClassifier + ), + ] +) +class TestWrongMethodsOrderRaisesErrorForCrossTechniques: + def test_wrong_methods_order( + self, + cross_technique, + predict_method, + dataset, + estimator_class, + request + ): + dataset = request.getfixturevalue(dataset) + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset + technique = cross_technique(estimator=estimator_class()) + + with pytest.raises( + ValueError, + match=r"call fit_conformalize before calling predict" + ): + technique.predict(X_test) + with pytest.raises( + ValueError, + match=f"call fit_conformalize before calling {predict_method}" + ): + getattr(technique, predict_method)(X_test) + + technique.fit_conformalize(X_conformalize, y_conformalize) + + with pytest.raises(ValueError, match=r"fit_conformalize method already called"): + technique.fit_conformalize(X_conformalize, y_conformalize) + X_toy = np.arange(18).reshape(-1, 1) y_toy = np.array( diff --git a/mapie/tests/test_conformity_scores_utils.py b/mapie/tests/test_conformity_scores_utils.py index 4636d2396..5a1a55790 100644 --- a/mapie/tests/test_conformity_scores_utils.py +++ b/mapie/tests/test_conformity_scores_utils.py @@ -3,9 +3,37 @@ import numpy as np import pytest +from mapie.conformity_scores import AbsoluteConformityScore, BaseRegressionScore, \ + GammaConformityScore, LACConformityScore, BaseClassificationScore, \ + TopKConformityScore from mapie.conformity_scores.sets.utils import get_true_label_position from numpy.typing import NDArray +from mapie.conformity_scores.utils import check_and_select_conformity_score + + +class TestCheckAndSelectConformityScore: + + @pytest.mark.parametrize( + "score, score_type, expected_class", [ + (AbsoluteConformityScore(), BaseRegressionScore, AbsoluteConformityScore), + ("gamma", BaseRegressionScore, GammaConformityScore), + (LACConformityScore(), BaseClassificationScore, LACConformityScore), + ("top_k", BaseClassificationScore, TopKConformityScore), + ] + ) + def test_with_valid_inputs(self, score, score_type, expected_class): + result = check_and_select_conformity_score(score, score_type) + assert isinstance(result, expected_class) + + @pytest.mark.parametrize( + "score_type", [BaseRegressionScore, BaseClassificationScore] + ) + def test_with_invalid_input(self, score_type): + with pytest.raises(ValueError): + check_and_select_conformity_score("I'm not a valid input :(", score_type) + + Y_TRUE_PROBA_PLACE = [ [ np.array([2, 0]), diff --git a/tests_v1/test_functional/test_non_regression_regression.py b/mapie/tests/test_non_regression_v0_to_v1.py similarity index 53% rename from tests_v1/test_functional/test_non_regression_regression.py rename to mapie/tests/test_non_regression_v0_to_v1.py index 2d25975b4..3c749c069 100644 --- a/tests_v1/test_functional/test_non_regression_regression.py +++ b/mapie/tests/test_non_regression_v0_to_v1.py @@ -1,29 +1,30 @@ -from __future__ import annotations -from typing import Optional, Union, Dict, Type +import inspect +from typing import Type, Union, Dict, Optional, Callable, Any, Tuple import numpy as np import pytest +from _pytest.fixtures import FixtureRequest +from numpy._typing import ArrayLike, NDArray from numpy.random import RandomState +from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.compose import TransformedTargetRegressor -from sklearn.datasets import make_regression -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import QuantileRegressor -from sklearn.ensemble import GradientBoostingRegressor -from sklearn.model_selection import train_test_split +from sklearn.datasets import make_classification, make_regression +from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor +from sklearn.linear_model import LogisticRegression, LinearRegression, QuantileRegressor +from sklearn.model_selection import LeaveOneOut, GroupKFold, train_test_split, \ + ShuffleSplit +from typing_extensions import Self -from mapie.subsample import Subsample -from numpy.typing import ArrayLike, NDArray -from mapie.conformity_scores import GammaConformityScore, \ - AbsoluteConformityScore, ResidualNormalisedScore -from mapie.regression import SplitConformalRegressor, \ - CrossConformalRegressor, \ - JackknifeAfterBootstrapRegressor, \ +from mapie.classification import _MapieClassifier, SplitConformalClassifier, \ + CrossConformalClassifier +from mapie.conformity_scores import LACConformityScore, TopKConformityScore, \ + APSConformityScore, RAPSConformityScore, AbsoluteConformityScore, \ + GammaConformityScore, ResidualNormalisedScore +from mapie.regression import CrossConformalRegressor, JackknifeAfterBootstrapRegressor +from mapie.regression.quantile_regression import _MapieQuantileRegressor, \ ConformalizedQuantileRegressor - -from mapie.regression.regression import _MapieRegressor -from mapie.regression.quantile_regression import _MapieQuantileRegressor -from tests_v1.test_functional.utils import filter_params, train_test_split_shuffle -from sklearn.model_selection import LeaveOneOut, GroupKFold +from mapie.regression.regression import _MapieRegressor, SplitConformalRegressor +from mapie.subsample import Subsample RANDOM_STATE = 1 K_FOLDS = 3 @@ -31,6 +32,408 @@ N_SAMPLES = 200 N_GROUPS = 5 + +@pytest.fixture(scope="module") +def dataset(): + X, y = make_classification( + n_samples=1000, + n_informative=5, + n_classes=4, + random_state=RANDOM_STATE + ) + sample_weight = RandomState(RANDOM_STATE).random(len(X)) + groups = np.array([i % 5 for i in range(len(X))]) + + ( + X_train, + X_conformalize, + y_train, + y_conformalize, + sample_weight_train, + sample_weight_conformalize, + ) = train_test_split_shuffle( + X, y, random_state=RANDOM_STATE, sample_weight=sample_weight + ) + + return { + "X": X, + "y": y, + "sample_weight": sample_weight, + "groups": groups, + "X_train": X_train, + "X_conformalize": X_conformalize, + "y_train": y_train, + "y_conformalize": y_conformalize, + "sample_weight_train": sample_weight_train, + "sample_weight_conformalize": sample_weight_conformalize, + } + + +@pytest.fixture() +def params_split_test_1(): + return { + "v1": { + "__init__": { + "estimator": LogisticRegression(), + }, + }, + "v0": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": LACConformityScore(), + "cv": "prefit" + }, + "predict": { + "alpha": 0.1, + }}} + + +@pytest.fixture() +def params_split_test_2(): + return { + "v1": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "confidence_level": 0.8, + "prefit": False, + "conformity_score": "top_k", + "random_state": RANDOM_STATE, + }, + "fit": { + "fit_params": {"dummy_fit_param": True}, + }, + "conformalize": { + "predict_params": {"dummy_predict_param": True}, + }}, + "v0": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "conformity_score": TopKConformityScore(), + "cv": "split", + "random_state": RANDOM_STATE, + }, + "fit": { + "fit_params": {"dummy_fit_param": True}, + "predict_params": {"dummy_predict_param": True}, + }, + "predict": { + "alpha": 0.2, + "dummy_predict_param": True, + }}} + + +@pytest.fixture() +def params_split_test_3(dataset): + return { + "v1": { + "__init__": { + "estimator": RandomForestClassifier(random_state=RANDOM_STATE), + "confidence_level": [0.8, 0.9], + "prefit": False, + "conformity_score": "aps", + "random_state": RANDOM_STATE, + }, + "fit": { + "fit_params": {"sample_weight": dataset["sample_weight_train"]}, + }, + "predict_set": { + "conformity_score_params": {"include_last_label": False} + }}, + "v0": { + "__init__": { + "estimator": RandomForestClassifier(random_state=RANDOM_STATE), + "conformity_score": APSConformityScore(), + "cv": "split", + "random_state": RANDOM_STATE, + }, + "fit": { + "sample_weight": dataset["sample_weight"], + }, + "predict": { + "alpha": [0.2, 0.1], + "include_last_label": False, + }}} + + +@pytest.fixture() +def params_split_test_4(): + return { + "v1": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": "raps", + "random_state": RANDOM_STATE, + }}, + "v0": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": RAPSConformityScore(), + "cv": "prefit", + "random_state": RANDOM_STATE, + }, + "predict": { + "alpha": 0.1, + }}} + + +@pytest.fixture() +def params_split_test_5(): + return { + "v1": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": RAPSConformityScore(size_raps=0.4), + "random_state": RANDOM_STATE, + }}, + "v0": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": RAPSConformityScore(size_raps=0.4), + "cv": "prefit", + "random_state": RANDOM_STATE, + }, + "predict": { + "alpha": 0.1, + }}} + + +@pytest.mark.parametrize( + "params_", [ + "params_split_test_1", + "params_split_test_2", + "params_split_test_3", + "params_split_test_4", + "params_split_test_5", + ] +) +def test_split( + dataset: Dict[str, Any], + params_: str, + request: FixtureRequest +) -> None: + X, y, X_train, X_conformalize, y_train, y_conformalize = ( + dataset["X"], + dataset["y"], + dataset["X_train"], + dataset["X_conformalize"], + dataset["y_train"], + dataset["y_conformalize"], + ) + + params = extract_params(request.getfixturevalue(params_)) + + prefit = params["v1_init"].get("prefit", True) + + if prefit: + params["v0_init"]["estimator"].fit(X_train, y_train) + params["v1_init"]["estimator"].fit(X_train, y_train) + + v0 = _MapieClassifier(**params["v0_init"]) + v1 = SplitConformalClassifier(**params["v1_init"]) + + if prefit: + v0.fit(X_conformalize, y_conformalize, **params["v0_fit"]) + else: + v0.fit(X, y, **params["v0_fit"]) + v1.fit(X_train, y_train, **params["v1_fit"]) + v1.conformalize(X_conformalize, y_conformalize, **params["v1_conformalize"]) + + v0_preds, v0_pred_sets = v0.predict(X_conformalize, **params["v0_predict"]) + v1_preds, v1_pred_sets = v1.predict_set(X_conformalize, **params["v1_predict_set"]) + + v1_preds_using_predict: NDArray = v1.predict(X_conformalize) + + np.testing.assert_array_equal(v0_preds, v1_preds) + np.testing.assert_array_equal(v0_pred_sets, v1_pred_sets) + np.testing.assert_array_equal(v1_preds_using_predict, v1_preds) + + n_confidence_level = get_number_of_confidence_levels(params["v1_init"]) + + assert v1_pred_sets.shape == ( + len(X_conformalize), + len(np.unique(y)), + n_confidence_level, + ) + + +@pytest.fixture() +def params_cross_test_1(dataset): + return { + "v1": { + "__init__": { + "estimator": LogisticRegression(), + "confidence_level": 0.8, + "conformity_score": "lac", + "cv": 4, + "random_state": RANDOM_STATE, + }, + "fit_conformalize": { + "fit_params": {"sample_weight": dataset["sample_weight"]}, + }, + }, + "v0": { + "__init__": { + "estimator": LogisticRegression(), + "conformity_score": LACConformityScore(), + "cv": 4, + "random_state": RANDOM_STATE, + }, + "fit": { + "sample_weight": dataset["sample_weight"], + }, + "predict": { + "alpha": 0.2, + }}} + + +@pytest.fixture() +def params_cross_test_2(): + return { + "v1": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "confidence_level": [0.9, 0.8], + "conformity_score": "aps", + "cv": LeaveOneOut(), + "random_state": RANDOM_STATE, + }, + "fit_conformalize": { + "predict_params": {"dummy_predict_param": True}, + }, + "predict_set": { + "conformity_score_params": {"include_last_label": False} + }, + }, + "v0": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "conformity_score": APSConformityScore(), + "cv": LeaveOneOut(), + "random_state": RANDOM_STATE, + }, + "fit": { + "predict_params": {"dummy_predict_param": True}, + }, + "predict": { + "alpha": [0.1, 0.2], + "include_last_label": False, + "dummy_predict_param": True, + }}} + + +@pytest.fixture() +def params_cross_test_3(dataset): + return { + "v1": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "cv": GroupKFold(), + "random_state": RANDOM_STATE, + }, + "fit_conformalize": { + "groups": dataset["groups"], + "fit_params": {"dummy_fit_param": True}, + }, + "predict_set": { + "agg_scores": "crossval", + }, + }, + "v0": { + "__init__": { + "estimator": DummyClassifierWithFitAndPredictParams(), + "cv": GroupKFold(), + "random_state": RANDOM_STATE, + }, + "fit": { + "groups": dataset["groups"], + "fit_params": {"dummy_fit_param": True}, + }, + "predict": { + "alpha": 0.1, + "agg_scores": "crossval", + }}} + + +@pytest.fixture() +def params_cross_test_4(): + return { + "v1": { + "__init__": { + "estimator": RandomForestClassifier(random_state=RANDOM_STATE), + "confidence_level": 0.7, + "conformity_score": LACConformityScore(), + "random_state": RANDOM_STATE, + }, + }, + "v0": { + "__init__": { + "estimator": RandomForestClassifier(random_state=RANDOM_STATE), + "cv": 5, + "random_state": RANDOM_STATE, + }, + "predict": { + "alpha": 0.3, + }}} + + +@pytest.mark.parametrize( + "params_", [ + "params_cross_test_1", + "params_cross_test_2", + "params_cross_test_3", + "params_cross_test_4", + ] +) +def test_cross( + dataset: Dict[str, Any], + params_: str, + request: FixtureRequest +): + X, y = dataset["X"], dataset["y"] + + params = extract_params(request.getfixturevalue(params_)) + + v0 = _MapieClassifier(**params["v0_init"]) + v1 = CrossConformalClassifier(**params["v1_init"]) + + v0.fit(X, y, **params["v0_fit"]) + v1.fit_conformalize(X, y, **params["v1_fit_conformalize"]) + + v0_preds, v0_pred_sets = v0.predict(X, **params["v0_predict"]) + v1_preds, v1_pred_sets = v1.predict_set(X, **params["v1_predict_set"]) + + v1_preds_using_predict: NDArray = v1.predict(X) + + np.testing.assert_array_equal(v0_preds, v1_preds) + np.testing.assert_array_equal(v0_pred_sets, v1_pred_sets) + np.testing.assert_array_equal(v1_preds_using_predict, v1_preds) + + n_confidence_level = get_number_of_confidence_levels(params["v1_init"]) + assert v1_pred_sets.shape == ( + len(X), + len(np.unique(y)), + n_confidence_level, + ) + + +def extract_params(params): + return { + "v0_init": params["v0"].get("__init__", {}), + "v0_fit": params["v0"].get("fit", {}), + "v0_predict": params["v0"].get("predict", {}), + "v1_init": params["v1"].get("__init__", {}), + "v1_fit": params["v1"].get("fit", {}), + "v1_conformalize": params["v1"].get("conformalize", {}), + "v1_predict_set": params["v1"].get("predict_set", {}), + "v1_fit_conformalize": params["v1"].get("fit_conformalize", {}) + } + + +def get_number_of_confidence_levels(v1_init_params): + confidence_level = v1_init_params.get("confidence_level", 0.9) + return 1 if isinstance(confidence_level, float) else len(confidence_level) + + X, y_signed = make_regression( n_samples=N_SAMPLES, n_features=10, @@ -422,14 +825,15 @@ def test_cross_and_jackknife(params: dict) -> None: @pytest.mark.parametrize("params_split", params_test_cases_split) -def test_intervals_and_predictions_exact_equality_split(params_split: dict) -> None: +def test_intervals_and_predictions_exact_equality_split( + params_split: dict) -> None: v0_params = params_split["v0"] v1_params = params_split["v1"] test_size = v1_params.get("test_size", None) prefit = v1_params.get("prefit", False) - compare_model_predictions_and_intervals( + compare_model_predictions_and_intervals_split_and_quantile( model_v0=_MapieRegressor, model_v1=SplitConformalRegressor, X=X, @@ -546,7 +950,7 @@ def test_intervals_and_predictions_exact_equality_quantile( test_size = v1_params.get("test_size", None) prefit = v1_params.get("prefit", False) - compare_model_predictions_and_intervals( + compare_model_predictions_and_intervals_split_and_quantile( model_v0=_MapieQuantileRegressor, model_v1=ConformalizedQuantileRegressor, X=X, @@ -559,12 +963,10 @@ def test_intervals_and_predictions_exact_equality_quantile( ) -def compare_model_predictions_and_intervals( +def compare_model_predictions_and_intervals_split_and_quantile( model_v0: Type[_MapieRegressor], model_v1: Type[Union[ SplitConformalRegressor, - CrossConformalRegressor, - JackknifeAfterBootstrapRegressor, ConformalizedQuantileRegressor ]], X: NDArray, @@ -575,23 +977,24 @@ def compare_model_predictions_and_intervals( test_size: Optional[float] = None, random_state: int = RANDOM_STATE, ) -> None: - if v0_params.get("alpha"): - if isinstance(v0_params["alpha"], float): - n_alpha = 1 - else: - n_alpha = len(v0_params["alpha"]) - else: + if isinstance(v0_params["alpha"], float): n_alpha = 1 - - if test_size is not None: - X_train, X_conf, y_train, y_conf = train_test_split_shuffle( - X, - y, - test_size=test_size, - random_state=random_state, - ) else: - X_train, X_conf, y_train, y_conf = X, X, y, y + n_alpha = len(v0_params["alpha"]) + + ( + X_train, + X_conf, + y_train, + y_conf, + sample_weight_train, + sample_weight_conf, + ) = train_test_split_shuffle( + X, + y, + test_size=test_size, + random_state=random_state, + ) if prefit: estimator = v0_params["estimator"] @@ -643,3 +1046,54 @@ def compare_model_predictions_and_intervals( # condition to remove when optimize_beta works # keep assertion assert v1_pred_intervals.shape == (len(X_conf), 2, n_alpha) + + +def train_test_split_shuffle( + X: NDArray, + y: NDArray, + test_size: Optional[float] = None, + random_state: int = 42, + sample_weight: Optional[NDArray] = None, +) -> Tuple[Any, Any, Any, Any, Any, Any]: + splitter = ShuffleSplit( + n_splits=1, + test_size=test_size, + random_state=random_state + ) + train_idx, test_idx = next(splitter.split(X)) + + X_train, X_test = X[train_idx], X[test_idx] + y_train, y_test = y[train_idx], y[test_idx] + if sample_weight is not None: + sample_weight_train = sample_weight[train_idx] + sample_weight_test = sample_weight[test_idx] + else: + sample_weight_train = None + sample_weight_test = None + + return X_train, X_test, y_train, y_test, sample_weight_train, sample_weight_test + + +def filter_params( + function: Callable, + params: Dict[str, Any] +) -> Dict[str, Any]: + model_params = inspect.signature(function).parameters + return {k: v for k, v in params.items() if k in model_params} + + +class DummyClassifierWithFitAndPredictParams(BaseEstimator, ClassifierMixin): + def __init__(self): + self.classes_ = None + self._dummy_fit_param = None + + def fit(self, X: NDArray, y: NDArray, dummy_fit_param: bool = False) -> Self: + self.classes_ = np.unique(y) + self._dummy_fit_param = dummy_fit_param + return self + + def predict_proba(self, X: NDArray, dummy_predict_param: bool = False) -> NDArray: + probas = np.zeros((len(X), len(self.classes_))) + probas[:, 0] = 0.1 + probas[:, 1] = 0.9 + return probas diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 4dbe52e4a..8a207afaf 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -33,9 +33,36 @@ from mapie.metrics.regression import ( regression_coverage_score, ) -from mapie.regression.regression import _MapieRegressor +from mapie.regression.regression import _MapieRegressor, \ + JackknifeAfterBootstrapRegressor from mapie.subsample import Subsample + +class TestCheckAndConvertResamplingToCv: + def test_with_integer(self): + regressor = JackknifeAfterBootstrapRegressor() + cv = regressor._check_and_convert_resampling_to_cv(50) + + assert isinstance(cv, Subsample) + assert cv.n_resamplings == 50 + + def test_with_subsample(self): + custom_subsample = Subsample(n_resamplings=25, random_state=42) + regressor = JackknifeAfterBootstrapRegressor() + cv = regressor._check_and_convert_resampling_to_cv(custom_subsample) + + assert cv is custom_subsample + + def test_with_invalid_input(self): + regressor = JackknifeAfterBootstrapRegressor() + + with pytest.raises( + ValueError, + match="resampling must be an integer or a Subsample instance" + ): + regressor._check_and_convert_resampling_to_cv("invalid_input") + + X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1) y_toy = np.array([5, 7, 9, 11, 13, 15]) X, y = make_regression( diff --git a/mapie/tests/test_utils.py b/mapie/tests/test_utils.py index cb1da4161..b3c3b30f3 100644 --- a/mapie/tests/test_utils.py +++ b/mapie/tests/test_utils.py @@ -3,6 +3,7 @@ import logging import re from typing import Any, Optional, Tuple +from unittest.mock import patch import numpy as np import pytest @@ -14,6 +15,7 @@ from sklearn.utils.validation import check_is_fitted from numpy.typing import ArrayLike, NDArray + from mapie.regression.quantile_regression import _MapieQuantileRegressor from mapie.utils import (_check_alpha, _check_alpha_and_n_samples, _check_array_inf, _check_array_nan, _check_arrays_length, @@ -22,7 +24,262 @@ _check_n_jobs, _check_n_samples, _check_no_agg_cv, _check_null_weight, _check_number_bins, _check_split_strategy, _check_verbose, - _compute_quantiles, _fit_estimator, _get_binning_groups) + _compute_quantiles, _fit_estimator, _get_binning_groups, + train_conformalize_test_split, + _transform_confidence_level_to_alpha, + _transform_confidence_level_to_alpha_list, + _check_if_param_in_allowed_values, _check_cv_not_string, + _cast_point_predictions_to_ndarray, + _cast_predictions_to_ndarray_tuple, _prepare_params, + _prepare_fit_params_and_sample_weight, + _raise_error_if_previous_method_not_called, + _raise_error_if_method_already_called, + _raise_error_if_fit_called_in_prefit_mode) + + +@pytest.fixture(scope="module") +def dataset(): + X, y = make_regression( + n_samples=100, n_features=2, noise=1.0, random_state=random_state + ) + return X, y + + +class TestTrainConformalizeTestSplit: + + def test_error_sum_int_is_not_dataset_size(self, dataset): + X, y = dataset + with pytest.raises(ValueError): + train_conformalize_test_split( + X, y, train_size=1, conformalize_size=1, + test_size=1, random_state=random_state + ) + + def test_error_sum_float_is_not_1(self, dataset): + X, y = dataset + with pytest.raises(ValueError): + train_conformalize_test_split( + X, y, train_size=0.5, conformalize_size=0.5, + test_size=0.5, random_state=random_state + ) + + def test_error_sizes_are_int_and_float(self, dataset): + X, y = dataset + with pytest.raises(TypeError): + train_conformalize_test_split( + X, y, train_size=5, conformalize_size=0.5, + test_size=0.5, random_state=random_state + ) + + def test_3_floats(self, dataset): + X, y = dataset + ( + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test + ) = train_conformalize_test_split( + X, y, train_size=0.6, conformalize_size=0.2, + test_size=0.2, random_state=random_state + ) + assert len(X_train) == 60 + assert len(X_conformalize) == 20 + assert len(X_test) == 20 + + def test_3_ints(self, dataset): + X, y = dataset + ( + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state + ) + assert len(X_train) == 60 + assert len(X_conformalize) == 20 + assert len(X_test) == 20 + + def test_random_state(self, dataset): + X, y = dataset + ( + X_train_1, X_conformalize_1, X_test_1, y_train_1, y_conformalize_1, y_test_1 + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state + ) + ( + X_train_2, X_conformalize_2, X_test_2, y_train_2, y_conformalize_2, y_test_2 + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state + ) + assert np.array_equal(X_train_1, X_train_2) + assert np.array_equal(X_conformalize_1, X_conformalize_2) + assert np.array_equal(X_test_1, X_test_2) + assert np.array_equal(y_train_1, y_train_2) + assert np.array_equal(y_conformalize_1, y_conformalize_2) + assert np.array_equal(y_test_1, y_test_2) + + def test_different_random_state(self, dataset): + X, y = dataset + ( + X_train_1, X_conformalize_1, X_test_1, y_train_1, y_conformalize_1, y_test_1 + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state + ) + ( + X_train_2, X_conformalize_2, X_test_2, y_train_2, y_conformalize_2, y_test_2 + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state + 1 + ) + assert not np.array_equal(X_train_1, X_train_2) + assert not np.array_equal(X_conformalize_1, X_conformalize_2) + assert not np.array_equal(X_test_1, X_test_2) + assert not np.array_equal(y_train_1, y_train_2) + assert not np.array_equal(y_conformalize_1, y_conformalize_2) + assert not np.array_equal(y_test_1, y_test_2) + + def test_shuffle_false(self, dataset): + X, y = dataset + ( + X_train, X_conformalize, X_test, y_train, y_conformalize, y_test + ) = train_conformalize_test_split( + X, y, train_size=60, conformalize_size=20, + test_size=20, random_state=random_state, shuffle=False + ) + assert np.array_equal(np.concatenate((y_train, y_conformalize, y_test)), y) + + +@pytest.fixture +def point_predictions(): + return np.array([1, 2, 3]) + + +@pytest.fixture +def point_and_interval_predictions(): + return np.array([1, 2]), np.array([3, 4]) + + +@pytest.mark.parametrize( + "confidence_level, expected", + [ + (0.9, 0.1), + (0.7, 0.3), + (0.999, 0.001), + ] +) +def test_transform_confidence_level_to_alpha(confidence_level, expected): + result = _transform_confidence_level_to_alpha(confidence_level) + assert result == expected + assert str(result) == str(expected) # Ensure clean representation + + +class TestTransformConfidenceLevelToAlphaList: + def test_non_list_iterable(self): + confidence_level = (0.8, 0.7) # Testing a non-list iterable + assert _transform_confidence_level_to_alpha_list(confidence_level) == [0.2, 0.3] + + def test_transform_confidence_level_to_alpha_is_called(self): + with patch( + 'mapie.utils._transform_confidence_level_to_alpha' + ) as mock_transform_confidence_level_to_alpha: + _transform_confidence_level_to_alpha_list([0.2, 0.3]) + mock_transform_confidence_level_to_alpha.assert_called() + + +class TestCheckIfParamInAllowedValues: + def test_error(self): + with pytest.raises(ValueError): + _check_if_param_in_allowed_values("invalid_option", "", ["valid_option"]) + + def test_ok(self): + assert _check_if_param_in_allowed_values("valid", "", ["valid"]) is None + + +def test_check_cv_not_string(): + with pytest.raises(ValueError): + _check_cv_not_string("string") + + +class TestCastPointPredictionsToNdarray: + def test_error(self, point_and_interval_predictions): + with pytest.raises(TypeError): + _cast_point_predictions_to_ndarray(point_and_interval_predictions) + + def test_valid_ndarray(self, point_predictions): + point_predictions = np.array([1, 2, 3]) + result = _cast_point_predictions_to_ndarray(point_predictions) + assert result is point_predictions + assert isinstance(result, np.ndarray) + + +class TestCastPredictionsToNdarrayTuple: + def test_error(self, point_predictions): + with pytest.raises(TypeError): + _cast_predictions_to_ndarray_tuple(point_predictions) + + def test_valid_ndarray(self, point_and_interval_predictions): + result = _cast_predictions_to_ndarray_tuple(point_and_interval_predictions) + assert result is point_and_interval_predictions + assert isinstance(result, tuple) + assert isinstance(result[0], np.ndarray) + assert isinstance(result[1], np.ndarray) + + +@pytest.mark.parametrize( + "params, expected", [(None, {}), ({"a": 1, "b": 2}, {"a": 1, "b": 2})] +) +def test_prepare_params(params, expected): + assert _prepare_params(params) == expected + assert _prepare_params(params) is not params + + +class TestPrepareFitParamsAndSampleWeight: + def test_uses_prepare_params(self): + with patch('mapie.utils._prepare_params') as mock_prepare_params: + _prepare_fit_params_and_sample_weight({"param1": 1}) + mock_prepare_params.assert_called() + + def test_with_sample_weight(self): + fit_params = {"sample_weight": [0.1, 0.2, 0.3]} + assert _prepare_fit_params_and_sample_weight(fit_params) == ( + {}, + [0.1, 0.2, 0.3] + ) + + def test_without_sample_weight(self): + params = {"param1": 1} + assert _prepare_fit_params_and_sample_weight(params) == (params, None) + + +class TestRaiseErrorIfPreviousMethodNotCalled: + def test_raises_error_when_previous_method_not_called(self): + with pytest.raises(ValueError): + _raise_error_if_previous_method_not_called( + "current_method", "previous_method", False + ) + + def test_does_nothing_when_previous_method_called(self): + assert _raise_error_if_previous_method_not_called( + "current_method", "previous_method", True + ) is None + + +class TestRaiseErrorIfMethodAlreadyCalled: + def test_raises_error_when_method_already_called(self): + with pytest.raises(ValueError): + _raise_error_if_method_already_called("method", True) + + def test_does_nothing_when_method_not_called(self): + assert _raise_error_if_method_already_called("method", False) is None + + +class TestRaiseErrorIfFitCalledInPrefitMode: + def test_raises_error_in_prefit_mode(self): + with pytest.raises(ValueError): + _raise_error_if_fit_called_in_prefit_mode(True) + + def test_does_nothing_when_not_in_prefit_mode(self): + assert _raise_error_if_fit_called_in_prefit_mode(False) is None + X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1) y_toy = np.array([5, 7, 9, 11, 13, 15]) diff --git a/tests_v1/test_functional/README.md b/tests_v1/test_functional/README.md deleted file mode 100644 index 43da65d73..000000000 --- a/tests_v1/test_functional/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Scope - -Folder for testing the main functionalities of the API as seen from a user point-of-view. - -# Philosophy - -- New tests here should be added wisely. -- Group tests in a class if more than one test is needed for a given functionality. -- Be careful of test time. Testing varied scenarios is more important than trying to test all scenarios. -- Write black-box tests if possible (no mocks): don't test implementation details. diff --git a/tests_v1/test_functional/test_functional.py b/tests_v1/test_functional/test_functional.py deleted file mode 100644 index 56daf3681..000000000 --- a/tests_v1/test_functional/test_functional.py +++ /dev/null @@ -1,212 +0,0 @@ -import numpy as np -import pytest -from sklearn.datasets import make_regression, make_classification -from sklearn.linear_model import LinearRegression, QuantileRegressor -from sklearn.dummy import DummyRegressor, DummyClassifier -from sklearn.model_selection import train_test_split -from mapie.regression import ( - SplitConformalRegressor, - CrossConformalRegressor, - ConformalizedQuantileRegressor, JackknifeAfterBootstrapRegressor, -) -from mapie.classification import SplitConformalClassifier, CrossConformalClassifier - -RANDOM_STATE = 1 - - -@pytest.fixture(scope="module") -def dataset_regression(): - X, y = make_regression( - n_samples=500, n_features=2, noise=1.0, random_state=RANDOM_STATE - ) - X_train, X_conf_test, y_train, y_conf_test = train_test_split( - X, y, random_state=RANDOM_STATE - ) - X_conformalize, X_test, y_conformalize, y_test = train_test_split( - X_conf_test, y_conf_test, random_state=RANDOM_STATE - ) - return X_train, X_conformalize, X_test, y_train, y_conformalize, y_test - - -@pytest.fixture(scope="module") -def dataset_classification(): - X, y = make_classification( - n_samples=500, n_informative=5, n_classes=4, random_state=RANDOM_STATE, - ) - X_train, X_conf_test, y_train, y_conf_test = train_test_split( - X, y, random_state=RANDOM_STATE - ) - X_conformalize, X_test, y_conformalize, y_test = train_test_split( - X_conf_test, y_conf_test, random_state=RANDOM_STATE - ) - return X_train, X_conformalize, X_test, y_train, y_conformalize, y_test - - -def test_scr_same_predictions_prefit_not_prefit(dataset_regression) -> None: - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = ( - dataset_regression) - regressor = LinearRegression() - regressor.fit(X_train, y_train) - scr_prefit = SplitConformalRegressor(estimator=regressor, prefit=True) - scr_prefit.conformalize(X_conformalize, y_conformalize) - predictions_scr_prefit = scr_prefit.predict_interval(X_test) - - scr_not_prefit = SplitConformalRegressor(estimator=LinearRegression(), prefit=False) - scr_not_prefit.fit(X_train, y_train).conformalize(X_conformalize, y_conformalize) - predictions_scr_not_prefit = scr_not_prefit.predict_interval(X_test) - np.testing.assert_equal(predictions_scr_prefit, predictions_scr_not_prefit) - - -@pytest.mark.parametrize( - "split_technique,predict_method,dataset,estimator_class", - [ - ( - SplitConformalRegressor, - "predict_interval", - "dataset_regression", - DummyRegressor - ), - ( - ConformalizedQuantileRegressor, - "predict_interval", - "dataset_regression", - QuantileRegressor - ), - ( - SplitConformalClassifier, - "predict_set", - "dataset_classification", - DummyClassifier - ) - ] -) -class TestWrongMethodsOrderRaisesErrorForSplitTechniques: - def test_with_prefit_false( - self, - split_technique, - predict_method, - dataset, - estimator_class, - request - ): - dataset = request.getfixturevalue(dataset) - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset - estimator = estimator_class() - technique = split_technique(estimator=estimator, prefit=False) - - with pytest.raises(ValueError, match=r"call fit before calling conformalize"): - technique.conformalize( - X_conformalize, - y_conformalize - ) - - technique.fit(X_train, y_train) - - with pytest.raises(ValueError, match=r"fit method already called"): - technique.fit(X_train, y_train) - with pytest.raises( - ValueError, - match=r"call conformalize before calling predict" - ): - technique.predict(X_test) - - with pytest.raises( - ValueError, - match=f"call conformalize before calling {predict_method}" - ): - getattr(technique, predict_method)(X_test) - - technique.conformalize(X_conformalize, y_conformalize) - - with pytest.raises(ValueError, match=r"conformalize method already called"): - technique.conformalize(X_conformalize, y_conformalize) - - def test_with_prefit_true( - self, - split_technique, - predict_method, - dataset, - estimator_class, - request - ): - dataset = request.getfixturevalue(dataset) - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset - estimator = estimator_class() - estimator.fit(X_train, y_train) - - if split_technique == ConformalizedQuantileRegressor: - technique = split_technique(estimator=[estimator] * 3, prefit=True) - else: - technique = split_technique(estimator=estimator, prefit=True) - - with pytest.raises(ValueError, match=r"The fit method must be skipped"): - technique.fit(X_train, y_train) - with pytest.raises( - ValueError, - match=r"call conformalize before calling predict" - ): - technique.predict(X_test) - - with pytest.raises( - ValueError, - match=f"call conformalize before calling {predict_method}" - ): - getattr(technique, predict_method)(X_test) - - technique.conformalize(X_conformalize, y_conformalize) - - with pytest.raises(ValueError, match=r"conformalize method already called"): - technique.conformalize(X_conformalize, y_conformalize) - - -@pytest.mark.parametrize( - "cross_technique,predict_method,dataset,estimator_class", - [ - ( - CrossConformalRegressor, - "predict_interval", - "dataset_regression", - DummyRegressor - ), - ( - JackknifeAfterBootstrapRegressor, - "predict_interval", - "dataset_regression", - DummyRegressor - ), - ( - CrossConformalClassifier, - "predict_set", - "dataset_classification", - DummyClassifier - ), - ] -) -class TestWrongMethodsOrderRaisesErrorForCrossTechniques: - def test_wrong_methods_order( - self, - cross_technique, - predict_method, - dataset, - estimator_class, - request - ): - dataset = request.getfixturevalue(dataset) - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test = dataset - technique = cross_technique(estimator=estimator_class()) - - with pytest.raises( - ValueError, - match=r"call fit_conformalize before calling predict" - ): - technique.predict(X_test) - with pytest.raises( - ValueError, - match=f"call fit_conformalize before calling {predict_method}" - ): - getattr(technique, predict_method)(X_test) - - technique.fit_conformalize(X_conformalize, y_conformalize) - - with pytest.raises(ValueError, match=r"fit_conformalize method already called"): - technique.fit_conformalize(X_conformalize, y_conformalize) diff --git a/tests_v1/test_functional/test_non_regression_classification.py b/tests_v1/test_functional/test_non_regression_classification.py deleted file mode 100644 index 58be6b6f1..000000000 --- a/tests_v1/test_functional/test_non_regression_classification.py +++ /dev/null @@ -1,419 +0,0 @@ -import numpy as np -import pytest -from numpy.random import RandomState -from sklearn.datasets import make_classification -from sklearn.ensemble import RandomForestClassifier -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import LeaveOneOut, GroupKFold - -from mapie.classification import ( - _MapieClassifier, - SplitConformalClassifier, - CrossConformalClassifier, -) -from mapie.conformity_scores import ( - RAPSConformityScore, - APSConformityScore, - TopKConformityScore, - LACConformityScore, -) -from tests_v1.test_functional.utils import ( - DummyClassifierWithFitAndPredictParams, - train_test_split_shuffle, -) -from numpy.typing import ArrayLike - -RANDOM_STATE = 1 - - -@pytest.fixture(scope="module") -def dataset(): - X, y = make_classification( - n_samples=1000, - n_informative=5, - n_classes=4, - random_state=RANDOM_STATE - ) - sample_weight = RandomState(RANDOM_STATE).random(len(X)) - groups = np.array([i % 5 for i in range(len(X))]) - - ( - X_train, - X_conformalize, - y_train, - y_conformalize, - sample_weight_train, - sample_weight_conformalize, - ) = train_test_split_shuffle( - X, y, random_state=RANDOM_STATE, sample_weight=sample_weight - ) - - return { - "X": X, - "y": y, - "sample_weight": sample_weight, - "groups": groups, - "X_train": X_train, - "X_conformalize": X_conformalize, - "y_train": y_train, - "y_conformalize": y_conformalize, - "sample_weight_train": sample_weight_train, - "sample_weight_conformalize": sample_weight_conformalize, - } - - -@pytest.fixture() -def params_split_test_1(): - return { - "v1": { - "__init__": { - "estimator": LogisticRegression(), - }, - }, - "v0": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": LACConformityScore(), - "cv": "prefit" - }, - "predict": { - "alpha": 0.1, - }}} - - -@pytest.fixture() -def params_split_test_2(): - return { - "v1": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "confidence_level": 0.8, - "prefit": False, - "conformity_score": "top_k", - "random_state": RANDOM_STATE, - }, - "fit": { - "fit_params": {"dummy_fit_param": True}, - }, - "conformalize": { - "predict_params": {"dummy_predict_param": True}, - }}, - "v0": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "conformity_score": TopKConformityScore(), - "cv": "split", - "random_state": RANDOM_STATE, - }, - "fit": { - "fit_params": {"dummy_fit_param": True}, - "predict_params": {"dummy_predict_param": True}, - }, - "predict": { - "alpha": 0.2, - "dummy_predict_param": True, - }}} - - -@pytest.fixture() -def params_split_test_3(dataset): - return { - "v1": { - "__init__": { - "estimator": RandomForestClassifier(random_state=RANDOM_STATE), - "confidence_level": [0.8, 0.9], - "prefit": False, - "conformity_score": "aps", - "random_state": RANDOM_STATE, - }, - "fit": { - "fit_params": {"sample_weight": dataset["sample_weight_train"]}, - }, - "predict_set": { - "conformity_score_params": {"include_last_label": False} - }}, - "v0": { - "__init__": { - "estimator": RandomForestClassifier(random_state=RANDOM_STATE), - "conformity_score": APSConformityScore(), - "cv": "split", - "random_state": RANDOM_STATE, - }, - "fit": { - "sample_weight": dataset["sample_weight"], - }, - "predict": { - "alpha": [0.2, 0.1], - "include_last_label": False, - }}} - - -@pytest.fixture() -def params_split_test_4(): - return { - "v1": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": "raps", - "random_state": RANDOM_STATE, - }}, - "v0": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": RAPSConformityScore(), - "cv": "prefit", - "random_state": RANDOM_STATE, - }, - "predict": { - "alpha": 0.1, - }}} - - -@pytest.fixture() -def params_split_test_5(): - return { - "v1": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": RAPSConformityScore(size_raps=0.4), - "random_state": RANDOM_STATE, - }}, - "v0": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": RAPSConformityScore(size_raps=0.4), - "cv": "prefit", - "random_state": RANDOM_STATE, - }, - "predict": { - "alpha": 0.1, - }}} - - -@pytest.mark.parametrize( - "params_", [ - "params_split_test_1", - "params_split_test_2", - "params_split_test_3", - "params_split_test_4", - "params_split_test_5", - ] -) -def test_split(dataset, params_, request): - X, y, X_train, X_conformalize, y_train, y_conformalize = ( - dataset["X"], - dataset["y"], - dataset["X_train"], - dataset["X_conformalize"], - dataset["y_train"], - dataset["y_conformalize"], - ) - - params = extract_params(request.getfixturevalue(params_)) - - prefit = params["v1_init"].get("prefit", True) - - if prefit: - params["v0_init"]["estimator"].fit(X_train, y_train) - params["v1_init"]["estimator"].fit(X_train, y_train) - - v0 = _MapieClassifier(**params["v0_init"]) - v1 = SplitConformalClassifier(**params["v1_init"]) - - if prefit: - v0.fit(X_conformalize, y_conformalize, **params["v0_fit"]) - else: - v0.fit(X, y, **params["v0_fit"]) - v1.fit(X_train, y_train, **params["v1_fit"]) - v1.conformalize(X_conformalize, y_conformalize, **params["v1_conformalize"]) - - v0_preds, v0_pred_sets = v0.predict(X_conformalize, **params["v0_predict"]) - v1_preds, v1_pred_sets = v1.predict_set(X_conformalize, **params["v1_predict_set"]) - - v1_preds_using_predict: ArrayLike = v1.predict(X_conformalize) - - np.testing.assert_array_equal(v0_preds, v1_preds) - np.testing.assert_array_equal(v0_pred_sets, v1_pred_sets) - np.testing.assert_array_equal(v1_preds_using_predict, v1_preds) - - n_confidence_level = get_number_of_confidence_levels(params["v1_init"]) - - assert v1_pred_sets.shape == ( - len(X_conformalize), - len(np.unique(y)), - n_confidence_level, - ) - - -@pytest.fixture() -def params_cross_test_1(dataset): - return { - "v1": { - "__init__": { - "estimator": LogisticRegression(), - "confidence_level": 0.8, - "conformity_score": "lac", - "cv": 4, - "random_state": RANDOM_STATE, - }, - "fit_conformalize": { - "fit_params": {"sample_weight": dataset["sample_weight"]}, - }, - }, - "v0": { - "__init__": { - "estimator": LogisticRegression(), - "conformity_score": LACConformityScore(), - "cv": 4, - "random_state": RANDOM_STATE, - }, - "fit": { - "sample_weight": dataset["sample_weight"], - }, - "predict": { - "alpha": 0.2, - }}} - - -@pytest.fixture() -def params_cross_test_2(): - return { - "v1": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "confidence_level": [0.9, 0.8], - "conformity_score": "aps", - "cv": LeaveOneOut(), - "random_state": RANDOM_STATE, - }, - "fit_conformalize": { - "predict_params": {"dummy_predict_param": True}, - }, - "predict_set": { - "conformity_score_params": {"include_last_label": False} - }, - }, - "v0": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "conformity_score": APSConformityScore(), - "cv": LeaveOneOut(), - "random_state": RANDOM_STATE, - }, - "fit": { - "predict_params": {"dummy_predict_param": True}, - }, - "predict": { - "alpha": [0.1, 0.2], - "include_last_label": False, - "dummy_predict_param": True, - }}} - - -@pytest.fixture() -def params_cross_test_3(dataset): - return { - "v1": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "cv": GroupKFold(), - "random_state": RANDOM_STATE, - }, - "fit_conformalize": { - "groups": dataset["groups"], - "fit_params": {"dummy_fit_param": True}, - }, - "predict_set": { - "agg_scores": "crossval", - }, - }, - "v0": { - "__init__": { - "estimator": DummyClassifierWithFitAndPredictParams(), - "cv": GroupKFold(), - "random_state": RANDOM_STATE, - }, - "fit": { - "groups": dataset["groups"], - "fit_params": {"dummy_fit_param": True}, - }, - "predict": { - "alpha": 0.1, - "agg_scores": "crossval", - }}} - - -@pytest.fixture() -def params_cross_test_4(): - return { - "v1": { - "__init__": { - "estimator": RandomForestClassifier(random_state=RANDOM_STATE), - "confidence_level": 0.7, - "conformity_score": LACConformityScore(), - "random_state": RANDOM_STATE, - }, - }, - "v0": { - "__init__": { - "estimator": RandomForestClassifier(random_state=RANDOM_STATE), - "cv": 5, - "random_state": RANDOM_STATE, - }, - "predict": { - "alpha": 0.3, - }}} - - -@pytest.mark.parametrize( - "params_", [ - "params_cross_test_1", - "params_cross_test_2", - "params_cross_test_3", - "params_cross_test_4", - ] -) -def test_cross(dataset, params_, request): - X, y = dataset["X"], dataset["y"] - - params = extract_params(request.getfixturevalue(params_)) - - v0 = _MapieClassifier(**params["v0_init"]) - v1 = CrossConformalClassifier(**params["v1_init"]) - - v0.fit(X, y, **params["v0_fit"]) - v1.fit_conformalize(X, y, **params["v1_fit_conformalize"]) - - v0_preds, v0_pred_sets = v0.predict(X, **params["v0_predict"]) - v1_preds, v1_pred_sets = v1.predict_set(X, **params["v1_predict_set"]) - - v1_preds_using_predict: ArrayLike = v1.predict(X) - - np.testing.assert_array_equal(v0_preds, v1_preds) - np.testing.assert_array_equal(v0_pred_sets, v1_pred_sets) - np.testing.assert_array_equal(v1_preds_using_predict, v1_preds) - - n_confidence_level = get_number_of_confidence_levels(params["v1_init"]) - assert v1_pred_sets.shape == ( - len(X), - len(np.unique(y)), - n_confidence_level, - ) - - -def extract_params(params): - return { - "v0_init": params["v0"].get("__init__", {}), - "v0_fit": params["v0"].get("fit", {}), - "v0_predict": params["v0"].get("predict", {}), - "v1_init": params["v1"].get("__init__", {}), - "v1_fit": params["v1"].get("fit", {}), - "v1_conformalize": params["v1"].get("conformalize", {}), - "v1_predict_set": params["v1"].get("predict_set", {}), - "v1_fit_conformalize": params["v1"].get("fit_conformalize", {}) - } - - -def get_number_of_confidence_levels(v1_init_params): - confidence_level = v1_init_params.get("confidence_level", 0.9) - return 1 if isinstance(confidence_level, float) else len(confidence_level) diff --git a/tests_v1/test_functional/utils.py b/tests_v1/test_functional/utils.py deleted file mode 100644 index 576f3054d..000000000 --- a/tests_v1/test_functional/utils.py +++ /dev/null @@ -1,77 +0,0 @@ -from typing import Callable, Dict, Any, Optional, Tuple, Union - -import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from typing_extensions import Self - -from numpy.typing import NDArray, ArrayLike -import inspect -from sklearn.model_selection import ShuffleSplit - - -def train_test_split_shuffle( - X: NDArray, - y: NDArray, - test_size: float = None, - random_state: int = 42, - sample_weight: Optional[NDArray] = None, -) -> Union[Tuple[Any, Any, Any, Any], Tuple[Any, Any, Any, Any, Any, Any]]: - splitter = ShuffleSplit( - n_splits=1, - test_size=test_size, - random_state=random_state - ) - train_idx, test_idx = next(splitter.split(X)) - - X_train, X_test = X[train_idx], X[test_idx] - y_train, y_test = y[train_idx], y[test_idx] - if sample_weight is not None: - sample_weight_train = sample_weight[train_idx] - sample_weight_test = sample_weight[test_idx] - return X_train, X_test, y_train, y_test, sample_weight_train, sample_weight_test - - return X_train, X_test, y_train, y_test - - -def filter_params( - function: Callable, - params: Optional[Dict[str, Any]] = None -) -> Dict[str, Any]: - if params is None: - return {} - - model_params = inspect.signature(function).parameters - return {k: v for k, v in params.items() if k in model_params} - - -class DummyClassifierWithFitAndPredictParams(BaseEstimator, ClassifierMixin): - def __init__(self): - self.classes_ = None - self._dummy_fit_param = None - - def fit(self, X: ArrayLike, y: ArrayLike, dummy_fit_param: bool = False) -> Self: - self.classes_ = np.unique(y) - if len(self.classes_) < 2: - raise ValueError("Dummy classifier needs at least 3 classes") - self._dummy_fit_param = dummy_fit_param - return self - - def predict_proba(self, X: ArrayLike, dummy_predict_param: bool = False) -> NDArray: - probas = np.zeros((len(X), len(self.classes_))) - if self._dummy_fit_param & dummy_predict_param: - probas[:, 0] = 0.1 - probas[:, 1] = 0.9 - elif self._dummy_fit_param: - probas[:, 1] = 0.1 - probas[:, 2] = 0.9 - elif dummy_predict_param: - probas[:, 1] = 0.1 - probas[:, 0] = 0.9 - else: - probas[:, 2] = 0.1 - probas[:, 0] = 0.9 - return probas - - def predict(self, X: ArrayLike, dummy_predict_param: bool = False) -> NDArray: - y_preds_proba = self.predict_proba(X, dummy_predict_param) - return np.amax(y_preds_proba, axis=0) diff --git a/tests_v1/test_unit/README.md b/tests_v1/test_unit/README.md deleted file mode 100644 index 51c84eeab..000000000 --- a/tests_v1/test_unit/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Scope - -Folder for testing small functions ("unit" = function). - -# Philosophy - -- Group tests in a class if more than one test is needed for a given function. -- Focus on the function goal to define the test cases (the function name is a good hint). -- Prefer black-box tests (no mocks) if possible, to avoid testing implementation details. diff --git a/tests_v1/test_unit/test_conformity_scores_utils.py b/tests_v1/test_unit/test_conformity_scores_utils.py deleted file mode 100644 index e27c11e60..000000000 --- a/tests_v1/test_unit/test_conformity_scores_utils.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest - -from mapie.conformity_scores.utils import ( - check_and_select_conformity_score, -) -from mapie.conformity_scores.regression import BaseRegressionScore -from mapie.conformity_scores.classification import BaseClassificationScore -from mapie.conformity_scores.bounds import ( - AbsoluteConformityScore, - GammaConformityScore, -) -from mapie.conformity_scores.sets import ( - LACConformityScore, - TopKConformityScore, -) - - -class TestCheckAndSelectConformityScore: - - @pytest.mark.parametrize( - "score, score_type, expected_class", [ - (AbsoluteConformityScore(), BaseRegressionScore, AbsoluteConformityScore), - ("gamma", BaseRegressionScore, GammaConformityScore), - (LACConformityScore(), BaseClassificationScore, LACConformityScore), - ("top_k", BaseClassificationScore, TopKConformityScore), - ] - ) - def test_with_valid_inputs(self, score, score_type, expected_class): - result = check_and_select_conformity_score(score, score_type) - assert isinstance(result, expected_class) - - @pytest.mark.parametrize( - "score_type", [BaseRegressionScore, BaseClassificationScore] - ) - def test_with_invalid_input(self, score_type): - with pytest.raises(ValueError): - check_and_select_conformity_score("I'm not a valid input :(", score_type) diff --git a/tests_v1/test_unit/test_regression.py b/tests_v1/test_unit/test_regression.py deleted file mode 100644 index 14a75c627..000000000 --- a/tests_v1/test_unit/test_regression.py +++ /dev/null @@ -1,28 +0,0 @@ -import pytest -from mapie.subsample import Subsample -from mapie.regression import JackknifeAfterBootstrapRegressor - - -class TestCheckAndConvertResamplingToCv: - def test_with_integer(self): - regressor = JackknifeAfterBootstrapRegressor() - cv = regressor._check_and_convert_resampling_to_cv(50) - - assert isinstance(cv, Subsample) - assert cv.n_resamplings == 50 - - def test_with_subsample(self): - custom_subsample = Subsample(n_resamplings=25, random_state=42) - regressor = JackknifeAfterBootstrapRegressor() - cv = regressor._check_and_convert_resampling_to_cv(custom_subsample) - - assert cv is custom_subsample - - def test_with_invalid_input(self): - regressor = JackknifeAfterBootstrapRegressor() - - with pytest.raises( - ValueError, - match="resampling must be an integer or a Subsample instance" - ): - regressor._check_and_convert_resampling_to_cv("invalid_input") diff --git a/tests_v1/test_unit/test_utils.py b/tests_v1/test_unit/test_utils.py deleted file mode 100644 index 4f31615cf..000000000 --- a/tests_v1/test_unit/test_utils.py +++ /dev/null @@ -1,266 +0,0 @@ -import numpy as np -import pytest -from sklearn.datasets import make_regression - -from mapie.utils import ( - _prepare_params, - _prepare_fit_params_and_sample_weight, - _transform_confidence_level_to_alpha_list, - _transform_confidence_level_to_alpha, - _check_if_param_in_allowed_values, - _check_cv_not_string, - _cast_point_predictions_to_ndarray, - _cast_predictions_to_ndarray_tuple, - _raise_error_if_previous_method_not_called, - _raise_error_if_method_already_called, - _raise_error_if_fit_called_in_prefit_mode, - train_conformalize_test_split -) -from unittest.mock import patch - - -RANDOM_STATE = 1 - - -@pytest.fixture(scope="module") -def dataset(): - X, y = make_regression( - n_samples=100, n_features=2, noise=1.0, random_state=RANDOM_STATE - ) - return X, y - - -class TestTrainConformalizeTestSplit: - - def test_error_sum_int_is_not_dataset_size(self, dataset): - X, y = dataset - with pytest.raises(ValueError): - train_conformalize_test_split( - X, y, train_size=1, conformalize_size=1, - test_size=1, random_state=RANDOM_STATE - ) - - def test_error_sum_float_is_not_1(self, dataset): - X, y = dataset - with pytest.raises(ValueError): - train_conformalize_test_split( - X, y, train_size=0.5, conformalize_size=0.5, - test_size=0.5, random_state=RANDOM_STATE - ) - - def test_error_sizes_are_int_and_float(self, dataset): - X, y = dataset - with pytest.raises(TypeError): - train_conformalize_test_split( - X, y, train_size=5, conformalize_size=0.5, - test_size=0.5, random_state=RANDOM_STATE - ) - - def test_3_floats(self, dataset): - X, y = dataset - ( - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test - ) = train_conformalize_test_split( - X, y, train_size=0.6, conformalize_size=0.2, - test_size=0.2, random_state=RANDOM_STATE - ) - assert len(X_train) == 60 - assert len(X_conformalize) == 20 - assert len(X_test) == 20 - - def test_3_ints(self, dataset): - X, y = dataset - ( - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE - ) - assert len(X_train) == 60 - assert len(X_conformalize) == 20 - assert len(X_test) == 20 - - def test_random_state(self, dataset): - X, y = dataset - ( - X_train_1, X_conformalize_1, X_test_1, y_train_1, y_conformalize_1, y_test_1 - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE - ) - ( - X_train_2, X_conformalize_2, X_test_2, y_train_2, y_conformalize_2, y_test_2 - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE - ) - assert np.array_equal(X_train_1, X_train_2) - assert np.array_equal(X_conformalize_1, X_conformalize_2) - assert np.array_equal(X_test_1, X_test_2) - assert np.array_equal(y_train_1, y_train_2) - assert np.array_equal(y_conformalize_1, y_conformalize_2) - assert np.array_equal(y_test_1, y_test_2) - - def test_different_random_state(self, dataset): - X, y = dataset - ( - X_train_1, X_conformalize_1, X_test_1, y_train_1, y_conformalize_1, y_test_1 - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE - ) - ( - X_train_2, X_conformalize_2, X_test_2, y_train_2, y_conformalize_2, y_test_2 - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE + 1 - ) - assert not np.array_equal(X_train_1, X_train_2) - assert not np.array_equal(X_conformalize_1, X_conformalize_2) - assert not np.array_equal(X_test_1, X_test_2) - assert not np.array_equal(y_train_1, y_train_2) - assert not np.array_equal(y_conformalize_1, y_conformalize_2) - assert not np.array_equal(y_test_1, y_test_2) - - def test_shuffle_false(self, dataset): - X, y = dataset - ( - X_train, X_conformalize, X_test, y_train, y_conformalize, y_test - ) = train_conformalize_test_split( - X, y, train_size=60, conformalize_size=20, - test_size=20, random_state=RANDOM_STATE, shuffle=False - ) - assert np.array_equal(np.concatenate((y_train, y_conformalize, y_test)), y) - - -@pytest.fixture -def point_predictions(): - return np.array([1, 2, 3]) - - -@pytest.fixture -def point_and_interval_predictions(): - return np.array([1, 2]), np.array([3, 4]) - - -@pytest.mark.parametrize( - "confidence_level, expected", - [ - (0.9, 0.1), - (0.7, 0.3), - (0.999, 0.001), - ] -) -def test_transform_confidence_level_to_alpha(confidence_level, expected): - result = _transform_confidence_level_to_alpha(confidence_level) - assert result == expected - assert str(result) == str(expected) # Ensure clean representation - - -class TestTransformConfidenceLevelToAlphaList: - def test_non_list_iterable(self): - confidence_level = (0.8, 0.7) # Testing a non-list iterable - assert _transform_confidence_level_to_alpha_list(confidence_level) == [0.2, 0.3] - - def test_transform_confidence_level_to_alpha_is_called(self): - with patch( - 'mapie.utils._transform_confidence_level_to_alpha' - ) as mock_transform_confidence_level_to_alpha: - _transform_confidence_level_to_alpha_list([0.2, 0.3]) - mock_transform_confidence_level_to_alpha.assert_called() - - -class TestCheckIfParamInAllowedValues: - def test_error(self): - with pytest.raises(ValueError): - _check_if_param_in_allowed_values("invalid_option", "", ["valid_option"]) - - def test_ok(self): - assert _check_if_param_in_allowed_values("valid", "", ["valid"]) is None - - -def test_check_cv_not_string(): - with pytest.raises(ValueError): - _check_cv_not_string("string") - - -class TestCastPointPredictionsToNdarray: - def test_error(self, point_and_interval_predictions): - with pytest.raises(TypeError): - _cast_point_predictions_to_ndarray(point_and_interval_predictions) - - def test_valid_ndarray(self, point_predictions): - point_predictions = np.array([1, 2, 3]) - result = _cast_point_predictions_to_ndarray(point_predictions) - assert result is point_predictions - assert isinstance(result, np.ndarray) - - -class TestCastPredictionsToNdarrayTuple: - def test_error(self, point_predictions): - with pytest.raises(TypeError): - _cast_predictions_to_ndarray_tuple(point_predictions) - - def test_valid_ndarray(self, point_and_interval_predictions): - result = _cast_predictions_to_ndarray_tuple(point_and_interval_predictions) - assert result is point_and_interval_predictions - assert isinstance(result, tuple) - assert isinstance(result[0], np.ndarray) - assert isinstance(result[1], np.ndarray) - - -@pytest.mark.parametrize( - "params, expected", [(None, {}), ({"a": 1, "b": 2}, {"a": 1, "b": 2})] -) -def test_prepare_params(params, expected): - assert _prepare_params(params) == expected - assert _prepare_params(params) is not params - - -class TestPrepareFitParamsAndSampleWeight: - def test_uses_prepare_params(self): - with patch('mapie.utils._prepare_params') as mock_prepare_params: - _prepare_fit_params_and_sample_weight({"param1": 1}) - mock_prepare_params.assert_called() - - def test_with_sample_weight(self): - fit_params = {"sample_weight": [0.1, 0.2, 0.3]} - assert _prepare_fit_params_and_sample_weight(fit_params) == ( - {}, - [0.1, 0.2, 0.3] - ) - - def test_without_sample_weight(self): - params = {"param1": 1} - assert _prepare_fit_params_and_sample_weight(params) == (params, None) - - -class TestRaiseErrorIfPreviousMethodNotCalled: - def test_raises_error_when_previous_method_not_called(self): - with pytest.raises(ValueError): - _raise_error_if_previous_method_not_called( - "current_method", "previous_method", False - ) - - def test_does_nothing_when_previous_method_called(self): - assert _raise_error_if_previous_method_not_called( - "current_method", "previous_method", True - ) is None - - -class TestRaiseErrorIfMethodAlreadyCalled: - def test_raises_error_when_method_already_called(self): - with pytest.raises(ValueError): - _raise_error_if_method_already_called("method", True) - - def test_does_nothing_when_method_not_called(self): - assert _raise_error_if_method_already_called("method", False) is None - - -class TestRaiseErrorIfFitCalledInPrefitMode: - def test_raises_error_in_prefit_mode(self): - with pytest.raises(ValueError): - _raise_error_if_fit_called_in_prefit_mode(True) - - def test_does_nothing_when_not_in_prefit_mode(self): - assert _raise_error_if_fit_called_in_prefit_mode(False) is None