diff --git a/docs/src/api.rst b/docs/src/api.rst
index dd6d6d2b9..86ef19945 100644
--- a/docs/src/api.rst
+++ b/docs/src/api.rst
@@ -37,11 +37,14 @@ Feature Importance functions
 .. autosummary::
    :toctree: ./generated/api/class/
    :template: function.rst
-
+   
+   cfi_analysis
    clustered_inference
    clustered_inference_pvalue
    ensemble_clustered_inference
    ensemble_clustered_inference_pvalue
+   loco_analysis
+   pfi_analysis
 
 Visualization
 =============
@@ -60,8 +63,8 @@ Samplers
    :toctree: ./generated/api/class/
    :template: class.rst
 
-   ~statistical_tools.ConditionalSampler
-   ~statistical_tools.GaussianKnockoffs
+   ~samplers.ConditionalSampler
+   ~samplers.GaussianKnockoffs
 
 Helper Functions
 ================
diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 859d9df9e..c45dd0c7e 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -131,7 +131,7 @@
         random_state=0,
     )
     vim.fit(X_train, y_train)
-    importances.append(vim.importance(X_test, y_test)["importance"])
+    importances.append(vim.importance(X_test, y_test))
 
 importances = np.array(importances).T
 
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index aa36b4c05..9a03da1a9 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -63,7 +63,7 @@
 import numpy as np
 from sklearn.base import clone
 from sklearn.linear_model import LogisticRegressionCV, RidgeCV
-from sklearn.metrics import r2_score, root_mean_squared_error
+from sklearn.metrics import mean_squared_error, r2_score
 from sklearn.model_selection import KFold
 
 n_folds = 5
@@ -78,7 +78,7 @@
     score = r2_score(
         y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
     )
-    mse = root_mean_squared_error(
+    mse = mean_squared_error(
         y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
     )
 
@@ -166,14 +166,14 @@
 import pandas as pd
 from scipy.stats import ttest_1samp
 
-cfi_vim_arr = np.array([x["importance"] for x in cfi_importance_list]) / 2
+cfi_vim_arr = np.array(cfi_importance_list) / 2
 cfi_pval = ttest_1samp(cfi_vim_arr, 0, alternative="greater").pvalue
 
 vim = [
     pd.DataFrame(
         {
             "var": np.arange(cfi_vim_arr.shape[1]),
-            "importance": x["importance"],
+            "importance": x,
             "fold": i,
             "pval": cfi_pval,
             "method": "CFI",
@@ -182,14 +182,14 @@
     for x in cfi_importance_list
 ]
 
-loco_vim_arr = np.array([x["importance"] for x in loco_importance_list])
+loco_vim_arr = np.array(loco_importance_list)
 loco_pval = ttest_1samp(loco_vim_arr, 0, alternative="greater").pvalue
 
 vim += [
     pd.DataFrame(
         {
             "var": np.arange(loco_vim_arr.shape[1]),
-            "importance": x["importance"],
+            "importance": x,
             "fold": i,
             "pval": loco_pval,
             "method": "LOCO",
@@ -198,14 +198,14 @@
     for x in loco_importance_list
 ]
 
-pfi_vim_arr = np.array([x["importance"] for x in pfi_importance_list])
+pfi_vim_arr = np.array(pfi_importance_list)
 pfi_pval = ttest_1samp(pfi_vim_arr, 0, alternative="greater").pvalue
 
 vim += [
     pd.DataFrame(
         {
             "var": np.arange(pfi_vim_arr.shape[1]),
-            "importance": x["importance"],
+            "importance": x,
             "fold": i,
             "pval": pfi_pval,
             "method": "PFI",
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index b1d87ff86..e17e19d1e 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -107,7 +107,7 @@ def run_one_fold(
         )
 
     vim.fit(X[train_index], y[train_index])
-    importance = vim.importance(X[test_index], y[test_index])["importance"]
+    importance = vim.importance(X[test_index], y[test_index])
 
     return pd.DataFrame(
         {
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index ad1a85c57..115e98e05 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -147,7 +147,7 @@
 from sklearn.covariance import LedoitWolf
 
 from hidimstat import ModelXKnockoff
-from hidimstat.statistical_tools.gaussian_knockoffs import GaussianKnockoffs
+from hidimstat.samplers import GaussianKnockoffs
 
 model_x_knockoff = ModelXKnockoff(
     ko_generator=GaussianKnockoffs(
diff --git a/examples/plot_loco.py b/examples/plot_loco.py
index 3ac3c7d8c..dbd9e62a5 100644
--- a/examples/plot_loco.py
+++ b/examples/plot_loco.py
@@ -82,7 +82,7 @@
     # importance. This process is repeated for all features to assess their individual
     # contributions.
     loco.fit(X_train, y_train)
-    importances = loco.importance(X_test, y_test)["importance"]
+    importances = loco.importance(X_test, y_test)
     df_list.append(
         pd.DataFrame(
             {
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 64487452f..5be96ad29 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -134,12 +134,8 @@
     vim_linear.fit(X[train], y[train])
     vim_non_linear.fit(X[train], y[train])
 
-    importances_linear.append(
-        vim_linear.importance(X[test], y[test])["importance"],
-    )
-    importances_non_linear.append(
-        vim_non_linear.importance(X[test], y[test])["importance"]
-    )
+    importances_linear.append(vim_linear.importance(X[test], y[test]))
+    importances_non_linear.append(vim_non_linear.importance(X[test], y[test]))
 
 
 # %%
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index a4e0ed310..00185f905 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -144,7 +144,7 @@
     )
     pfi.fit(X_test, y_test)
 
-    permutation_importances.append(pfi.importance(X_test, y_test)["importance"])
+    permutation_importances.append(pfi.importance(X_test, y_test))
 permutation_importances = np.stack(permutation_importances)
 pval_pfi = ttest_1samp(
     permutation_importances, 0.0, axis=0, alternative="greater"
@@ -216,7 +216,7 @@
     )
     cfi.fit(X_test, y_test)
 
-    conditional_importances.append(cfi.importance(X_test, y_test)["importance"])
+    conditional_importances.append(cfi.importance(X_test, y_test))
 
 
 cfi_pval = ttest_1samp(
@@ -267,7 +267,7 @@
 
 from matplotlib.lines import Line2D
 
-from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
+from hidimstat.samplers.conditional_sampling import ConditionalSampler
 
 X_train, X_test = train_test_split(
     X,
diff --git a/src/hidimstat/__init__.py b/src/hidimstat/__init__.py
index 3cb37dadd..edebc8d18 100644
--- a/src/hidimstat/__init__.py
+++ b/src/hidimstat/__init__.py
@@ -1,4 +1,4 @@
-from .conditional_feature_importance import CFI
+from .conditional_feature_importance import CFI, cfi_analysis
 from .desparsified_lasso import DesparsifiedLasso, desparsified_lasso, reid
 from .distilled_conditional_randomization_test import D0CRT, d0crt
 from .ensemble_clustered_inference import (
@@ -8,8 +8,8 @@
     ensemble_clustered_inference_pvalue,
 )
 from .knockoffs import ModelXKnockoff
-from .leave_one_covariate_out import LOCO
-from .permutation_feature_importance import PFI
+from .leave_one_covariate_out import LOCO, loco_analysis
+from .permutation_feature_importance import PFI, pfi_analysis
 from .statistical_tools.aggregation import quantile_aggregation
 
 try:
@@ -30,6 +30,9 @@
     "reid",
     "ModelXKnockoff",
     "CFI",
+    "cfi_analysis",
     "LOCO",
+    "loco_analysis",
     "PFI",
+    "pfi_analysis",
 ]
diff --git a/src/hidimstat/_utils/utils.py b/src/hidimstat/_utils/utils.py
index 228a3bdfb..459afb0cb 100644
--- a/src/hidimstat/_utils/utils.py
+++ b/src/hidimstat/_utils/utils.py
@@ -1,7 +1,11 @@
 import numbers
+from functools import partial
 
 import numpy as np
 from numpy.random import RandomState
+from scipy.stats import ttest_1samp, wilcoxon
+
+from hidimstat.statistical_tools.nadeau_bengio_ttest import nadeau_bengio_ttest
 
 
 def _check_vim_predict_method(method):
@@ -33,6 +37,37 @@ def _check_vim_predict_method(method):
         )
 
 
+def get_fitted_attributes(cls):
+    """
+    Get all attributes from a class that end with a single underscore
+    and doesn't start with one underscore.
+
+    Parameters
+    ----------
+    cls : class
+        The class to inspect for attributes.
+
+    Returns
+    -------
+    list
+        A list of attribute names that end with a single underscore but not double underscore.
+    """
+    # Get all attributes and methods of the class
+    all_attributes = dir(cls)
+
+    # Filter out attributes that start with an underscore
+    filtered_attributes = [attr for attr in all_attributes if not attr.startswith("_")]
+
+    # Filter out attributes that do not end with a single underscore
+    result = [
+        attr
+        for attr in filtered_attributes
+        if attr.endswith("_") and not attr.endswith("__")
+    ]
+
+    return result
+
+
 def check_random_state(seed):
     """
     Modified version of sklearn's check_random_state using np.random.Generator.
@@ -105,3 +140,56 @@ def seed_estimator(estimator, random_state=None):
                 setattr(value, "random_state", RandomState(rng.bit_generator))
 
     return estimator
+
+
+def check_statistical_test(statistical_test, test_frac=None):
+    """
+    Validates and returns a test statistic function.
+
+    Parameters
+    ----------
+    statisticcal_test : str or callable
+        If str, must be either 'ttest' or 'wilcoxon'.
+        If callable, must be a function that can be used as a test statistic.
+    test_frac : float, optional
+        The fraction of data used for testing in the Nadeau-Bengio t-test.
+
+    Returns
+    -------
+    callable
+        A function that can be used as a test statistic.
+        For string inputs, returns a partial function of either ttest_1samp or wilcoxon.
+        For callable inputs, returns the input function.
+
+    Raises
+    ------
+    ValueError
+        If test is a string but not one of the supported test names ('ttest' or 'wilcoxon').
+    ValueError
+        If test is neither a string nor a callable.
+    """
+    if isinstance(statistical_test, str):
+        if statistical_test == "ttest":
+            return partial(ttest_1samp, popmean=0, alternative="greater", axis=1)
+        elif statistical_test == "wilcoxon":
+            return partial(wilcoxon, alternative="greater", axis=1)
+        elif statistical_test == "nb-ttest":
+            return partial(
+                nadeau_bengio_ttest,
+                popmean=0,
+                test_frac=test_frac,
+                alternative="greater",
+                axis=1,
+            )
+        else:
+            raise ValueError(f"the test '{statistical_test}' is not supported")
+    elif callable(statistical_test):
+        return statistical_test
+    else:
+        raise ValueError(
+            f"Unsupported value for 'statistical_test'."
+            f"The provided argument was '{statistical_test}'. "
+            f"Please choose from the following valid options: "
+            f"string values ('ttest', 'wilcoxon', 'nb-ttest') "
+            f"or a custom callable function with a `scipy.stats` API-compatible signature."
+        )
diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 11d3d7aac..35b49bb29 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -1,9 +1,13 @@
 import numpy as np
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted
-from sklearn.metrics import root_mean_squared_error
+from sklearn.metrics import mean_squared_error
 
-from hidimstat._utils.utils import _check_vim_predict_method, check_random_state
+from hidimstat._utils.utils import (
+    _check_vim_predict_method,
+    check_random_state,
+    check_statistical_test,
+)
 from hidimstat.base_variable_importance import (
     BaseVariableImportance,
     GroupVariableImportanceMixin,
@@ -11,56 +15,80 @@
 
 
 class BasePerturbation(BaseVariableImportance, GroupVariableImportanceMixin):
+    """
+    Abstract base class for model-agnostic variable importance measures using
+    perturbation techniques.
+
+    Parameters
+    ----------
+    estimator : sklearn-compatible estimator
+        The fitted estimator used for predictions.
+    method : str, default="predict"
+        The method used for making predictions. This determines the predictions
+        passed to the loss function. Supported methods are "predict",
+        "predict_proba", "decision_function", "transform".
+    loss : callable, default=mean_squared_error
+        The function to compute the loss when comparing the perturbed model
+        to the original model.
+    n_permutations : int, default=50
+        Number of permutations for each feature group.
+    statistical_test : callable or str, default="nb-ttest"
+        Statistical test function for computing p-values from importance scores.
+    features_groups : dict or None, default=None
+        Mapping of group names to lists of feature indices or names. If None, groups are inferred.
+    n_jobs : int, default=1
+        Number of parallel jobs for computation.
+    random_state : int or None, default=None
+        Seed for reproducible permutations.
+
+    Attributes
+    ----------
+    features_groups : dict
+        Mapping of feature groups identified during fit.
+    importances_ : ndarray (n_groups,)
+        Importance scores for each feature group.
+    loss_reference_ : float
+        Loss on original (non-perturbed) data.
+    loss_ : dict
+        Loss values for each permutation of each group.
+    pvalues_ : ndarray of shape (n_groups,)
+        P-values for importance scores.
+
+    Notes
+    -----
+    This class is abstract. Subclasses must implement the `_permutation` method
+    to define how feature groups are perturbed.
+    """
+
     def __init__(
         self,
         estimator,
-        loss: callable = root_mean_squared_error,
-        n_permutations: int = 50,
         method: str = "predict",
+        loss: callable = mean_squared_error,
+        n_permutations: int = 50,
+        statistical_test="ttest",
         features_groups=None,
         n_jobs: int = 1,
         random_state=None,
     ):
-        """
-        Base class for model-agnostic variable importance measures based on
-        perturbation.
-
-        Parameters
-        ----------
-        estimator : sklearn compatible estimator, optional
-            The estimator to use for the prediction.
-        loss : callable, default=root_mean_squared_error
-            The function to compute the loss when comparing the perturbed model
-            to the original model.
-        n_permutations : int, default=50
-            This parameter is relevant only for PFI or CFI.
-            Specifies the number of times the variable group (residual for CFI) is
-            permuted. For each permutation, the perturbed model's loss is calculated
-            and averaged over all permutations.
-        method : str, default="predict"
-            The method used for making predictions. This determines the predictions
-            passed to the loss function. Supported methods are "predict",
-            "predict_proba", "decision_function", "transform".
-        features_groups: dict or None, default=None
-            A dictionary where the keys are the group names and the values are the
-            list of column names corresponding to each features group. If None,
-            the features_groups are identified based on the columns of X.
-        n_jobs : int, default=1
-            The number of parallel jobs to run. Parallelization is done over the
-            variables or groups of variables.
-        random_state : int, default=None
-            The random state to use for sampling.
-        """
         super().__init__()
+        GroupVariableImportanceMixin.__init__(self, features_groups=features_groups)
         check_is_fitted(estimator)
         assert n_permutations > 0, "n_permutations must be positive"
         self.estimator = estimator
         self.loss = loss
         _check_vim_predict_method(method)
         self.method = method
-        self.n_jobs = n_jobs
         self.n_permutations = n_permutations
-        GroupVariableImportanceMixin.__init__(self, features_groups=features_groups)
+        self.statistical_test = statistical_test
+        self.n_jobs = n_jobs
+
+        # variable set in importance
+        self.loss_reference_ = None
+        self.loss_ = None
+        # internal variables
+        self._n_groups = None
+        self._groups_ids = None
         self.random_state = random_state
 
     def fit(self, X, y=None):
@@ -95,7 +123,7 @@ def _check_compatibility(self, X):
         """Check compatibility between input data and fitted model."""
         GroupVariableImportanceMixin._check_compatibility(self, X)
 
-    def predict(self, X):
+    def _predict(self, X):
         """
         Compute the predictions after perturbation of the data for each group of
         variables.
@@ -110,8 +138,6 @@ def predict(self, X):
         out: array-like of shape (n_groups, n_permutations, n_samples)
             The predictions after perturbation of the data for each group of variables.
         """
-        self._check_fit()
-        self._check_compatibility(X)
         X_ = np.asarray(X)
         rng = check_random_state(self.random_state)
 
@@ -132,45 +158,100 @@ def importance(self, X, y):
 
         Parameters
         ----------
-        X: array-like of shape (n_samples, n_features)
-            The input samples.
-        y: array-like of shape (n_samples,)
-            The target values.
+        X : array-like of shape (n_samples, n_features)
+            The input samples to compute importance scores for.
+        y : array-like of shape (n_samples,)
+
+        importances_ : ndarray of shape (n_groups,)
+            The importance scores for each group of covariates.
+            A higher score indicates greater importance of that group.
 
         Returns
         -------
-        out_dict: dict
-            A dictionary containing the following keys:
-            - 'loss_reference': the loss of the model with the original data.
-            - 'loss': a dictionary containing the loss of the perturbed model
-            for each group.
-            - 'importance': the importance scores for each group.
-        """
-        GroupVariableImportanceMixin._check_fit(self)
-        GroupVariableImportanceMixin._check_compatibility(self, X)
+        importances_ : ndarray of shape (n_features,)
+            Importance scores for each feature.
+
+        Attributes
+        ----------
+        loss_reference_ : float
+            The loss of the model with the original (non-perturbed) data.
+        loss_ : dict
+            Dictionary with indices as keys and arrays of perturbed losses as values.
+            Contains the loss values for each permutation of each group.
+        importances_ : ndarray of shape (n_groups,)
+            The calculated importance scores for each group.
+        pvalues_ : ndarray of shape (n_groups,)
+            P-values from one-sample t-test testing if importance scores are
+            significantly greater than 0.
 
-        out_dict = dict()
+        Notes
+        -----
+        The importance score for each group is calculated as the mean increase in loss
+        when that group is perturbed, compared to the reference loss.
+        A higher importance score indicates that perturbing that group leads to
+        worse model performance, suggesting those features are more important.
+        """
+        self._check_fit()
+        self._check_compatibility(X)
+        statistical_test = check_statistical_test(self.statistical_test)
 
         y_pred = getattr(self.estimator, self.method)(X)
-        loss_reference = self.loss(y, y_pred)
-        out_dict["loss_reference"] = loss_reference
+        self.loss_reference_ = self.loss(y, y_pred)
 
-        y_pred = self.predict(X)
-        out_dict["loss"] = dict()
+        y_pred = self._predict(X)
+        self.loss_ = dict()
         for j, y_pred_j in enumerate(y_pred):
             list_loss = []
             for y_pred_perm in y_pred_j:
                 list_loss.append(self.loss(y, y_pred_perm))
-            out_dict["loss"][j] = np.array(list_loss)
+            self.loss_[j] = np.array(list_loss)
 
-        out_dict["importance"] = np.array(
+        test_result = np.array(
             [
-                np.mean(out_dict["loss"][j]) - loss_reference
+                self.loss_[j] - self.loss_reference_
                 for j in range(self.n_features_groups_)
             ]
         )
-        self.importances_ = out_dict["importance"]
-        return out_dict
+        self.importances_ = np.mean(test_result, axis=1)
+        self.pvalues_ = statistical_test(test_result).pvalue
+        assert (
+            self.pvalues_.shape[0] == y_pred.shape[0]
+        ), "The statistical test doesn't provide the correct dimension."
+        return self.importances_
+
+    def fit_importance(self, X, y):
+        """
+        Fits the model to the data and computes feature importance scores.
+        Convenience method that combines fit() and importance() into a single call.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        importances_ : ndarray of shape (n_groups,)
+            The calculated importance scores for each feature group.
+            Higher values indicate greater importance.
+
+        Notes
+        -----
+        This method first calls fit() to identify feature groups, then calls
+        importance() to compute the importance scores for each group.
+        """
+        self.fit(X, y)
+        return self.importance(X, y)
+
+    def _check_importance(self):
+        """
+        Checks if the loss has been computed.
+        """
+        super()._check_importance()
+        if (self.loss_reference_ is None) or (self.loss_ is None):
+            raise ValueError("The importance method has not yet been called.")
 
     def _joblib_predict_one_features_group(
         self, X, features_group_id, features_group_key, random_state=None
diff --git a/src/hidimstat/base_variable_importance.py b/src/hidimstat/base_variable_importance.py
index e7b9a7e25..7d44ff149 100644
--- a/src/hidimstat/base_variable_importance.py
+++ b/src/hidimstat/base_variable_importance.py
@@ -409,7 +409,6 @@ class GroupVariableImportanceMixin:
     """
 
     def __init__(self, features_groups=None):
-        super().__init__()
         self.features_groups = features_groups
         self.n_features_groups_ = None
         self._features_groups_ids = None
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 626f7c12c..a2fa2072e 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -1,80 +1,90 @@
+from functools import partial
+
 import numpy as np
 from joblib import Parallel, delayed
+from scipy.stats import wilcoxon
 from sklearn.base import BaseEstimator, check_is_fitted, clone
-from sklearn.metrics import root_mean_squared_error
+from sklearn.linear_model import LogisticRegressionCV, RidgeCV
+from sklearn.metrics import mean_squared_error
 
+from hidimstat._utils.docstring import _aggregate_docstring
 from hidimstat.base_perturbation import BasePerturbation
-from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
+from hidimstat.samplers.conditional_sampling import ConditionalSampler
 
 
 class CFI(BasePerturbation):
+    """
+    Conditional Feature Importance (CFI) algorithm.
+    :footcite:t:`Chamma_NeurIPS2023` and for group-level see
+    :footcite:t:`Chamma_AAAI2024`.
+
+    Parameters
+    ----------
+    estimator : sklearn compatible estimator
+        The estimator to use for the prediction.
+    method : str, default="predict"
+        The method to use for the prediction. This determines the predictions passed
+        to the loss function. Supported methods are "predict", "predict_proba" or
+        "decision_function".
+    loss : callable, default=mean_squared_error
+        The loss function to use when comparing the perturbed model to the full
+        model.
+    n_permutations : int, default=50
+        The number of permutations to perform. For each variable/group of variables,
+        the mean of the losses over the `n_permutations` is computed.
+    imputation_model_continuous : sklearn compatible estimator, default=RidgeCV()
+        The model used to estimate the conditional distribution of a given
+        continuous variable/group of variables given the others.
+    imputation_model_categorical : sklearn compatible estimator, default=LogisticRegressionCV()
+        The model used to estimate the conditional distribution of a given
+        categorical variable/group of variables given the others. Binary is
+        considered as a special case of categorical.
+    features_groups: dict or None, default=None
+        A dictionary where the keys are the group names and the values are the
+        list of column names corresponding to each features group. If None,
+        the features_groups are identified based on the columns of X.
+    feature_types: str or list, default="auto"
+        The feature type. Supported types include "auto", "continuous", and
+        "categorical". If "auto", the type is inferred from the cardinality
+        of the unique values passed to the `fit` method.
+    categorical_max_cardinality : int, default=10
+        The maximum cardinality of a variable to be considered as categorical
+        when the variable type is inferred (set to "auto" or not provided).
+    statistical_test : callable or str, default="ttest"
+        Statistical test function for computing p-values of importance scores.
+    random_state : int or None, default=None
+        The random state to use for sampling.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. Parallelization is done over the
+        variables or groups of variables.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+
     def __init__(
         self,
         estimator,
-        loss: callable = root_mean_squared_error,
         method: str = "predict",
+        loss: callable = mean_squared_error,
         n_permutations: int = 50,
-        imputation_model_continuous=None,
-        imputation_model_categorical=None,
+        imputation_model_continuous=RidgeCV(),
+        imputation_model_categorical=LogisticRegressionCV(),
         features_groups=None,
         feature_types="auto",
         categorical_max_cardinality: int = 10,
-        n_jobs: int = 1,
+        statistical_test="ttest",
         random_state: int = None,
+        n_jobs: int = 1,
     ):
-        """
-        Conditional Feature Importance (CFI) algorithm.
-        :footcite:t:`Chamma_NeurIPS2023` and for group-level see
-        :footcite:t:`Chamma_AAAI2024`.
-
-        Parameters
-        ----------
-        estimator : sklearn compatible estimator, optional
-            The estimator to use for the prediction.
-        loss : callable, default=root_mean_squared_error
-            The loss function to use when comparing the perturbed model to the full
-            model.
-        method : str, default="predict"
-            The method to use for the prediction. This determines the predictions passed
-            to the loss function. Supported methods are "predict", "predict_proba" or
-            "decision_function".
-        n_permutations : int, default=50
-            The number of permutations to perform. For each feature/group of features,
-            the mean of the losses over the `n_permutations` is computed.
-        imputation_model_continuous : sklearn compatible estimator, optional
-            The model used to estimate the conditional distribution of a given
-            continuous features/group of features given the others.
-        imputation_model_categorical : sklearn compatible estimator, optional
-            The model used to estimate the conditional distribution of a given
-            categorical features/group of features given the others. Binary is
-            considered as a special case of categorical.
-        categorical_max_cardinality : int, default=10
-            The maximum cardinality of a feature to be considered as categorical
-            when the feature type is inferred (set to "auto" or not provided).
-        features_groups: dict or None,  default=None
-            A dictionary where the keys are the group names and the values are the
-            list of column names corresponding to each features group. If None,
-            the features_groups are identified based on the columns of X.
-        feature_types: str or list, default="auto"
-            The feature type. Supported types include "auto", "continuous", and
-            "categorical". If "auto", the type is inferred from the cardinality
-            of the unique values passed to the `fit` method.
-        random_state : int, default=None
-            The random state to use for sampling.
-        n_jobs : int, default=1
-            The number of jobs to run in parallel. Parallelization is done over the
-            features or groups of features.
-
-        References
-        ----------
-        .. footbibliography::
-        """
         super().__init__(
             estimator=estimator,
-            loss=loss,
             method=method,
-            n_jobs=n_jobs,
+            loss=loss,
             n_permutations=n_permutations,
+            statistical_test=statistical_test,
+            n_jobs=n_jobs,
             features_groups=features_groups,
             random_state=random_state,
         )
@@ -94,7 +104,8 @@ def __init__(
         self.imputation_model_continuous = imputation_model_continuous
 
     def fit(self, X, y=None):
-        """Fit the imputation models.
+        """
+        Fit the imputation models.
 
         Parameters
         ----------
@@ -151,6 +162,32 @@ def fit(self, X, y=None):
 
         return self
 
+    def fit_importance(self, X, y):
+        """
+        Fits the model to the data and computes feature importance scores.
+        Convenience method that combines fit() and importance() into a single call.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        importances_ : ndarray of shape (n_groups,)
+            The calculated importance scores for each feature group.
+            Higher values indicate greater importance.
+
+        Notes
+        -----
+        This method first calls fit() to identify feature groups, then calls
+        importance() to compute the importance scores for each group.
+        """
+        self.fit(X, y)
+        return self.importance(X, y)
+
     def _joblib_fit_one_features_group(self, estimator, X, features_groups_ids):
         """Fit a single imputation model, for a single group of features. This method
         is parallelized."""
@@ -187,3 +224,68 @@ def _permutation(self, X, features_group_id, random_state=None):
         return self._list_imputation_models[features_group_id].sample(
             X_minus_j, X_j, n_samples=self.n_permutations, random_state=random_state
         )
+
+
+def cfi_analysis(
+    estimator,
+    X,
+    y,
+    method: str = "predict",
+    loss: callable = mean_squared_error,
+    n_permutations: int = 50,
+    imputation_model_continuous=RidgeCV(),
+    imputation_model_categorical=LogisticRegressionCV(),
+    features_groups=None,
+    feature_types="auto",
+    categorical_max_cardinality: int = 10,
+    test_statistic="ttest",
+    k_best=None,
+    percentile=None,
+    threshold_max=None,
+    threshold_min=None,
+    random_state: int = None,
+    n_jobs: int = 1,
+):
+    methods = CFI(
+        estimator=estimator,
+        method=method,
+        loss=loss,
+        n_permutations=n_permutations,
+        imputation_model_continuous=imputation_model_continuous,
+        imputation_model_categorical=imputation_model_categorical,
+        features_groups=features_groups,
+        feature_types=feature_types,
+        categorical_max_cardinality=categorical_max_cardinality,
+        statistical_test=test_statistic,
+        random_state=random_state,
+        n_jobs=n_jobs,
+    )
+    methods.fit_importance(X, y)
+    selection = methods.importance_selection(
+        k_best=k_best,
+        percentile=percentile,
+        threshold_max=threshold_max,
+        threshold_min=threshold_min,
+    )
+    return selection, methods.importances_, methods.pvalues_
+
+
+# use the docstring of the class for the function
+cfi_analysis.__doc__ = _aggregate_docstring(
+    [
+        CFI.__doc__,
+        CFI.__init__.__doc__,
+        CFI.fit_importance.__doc__,
+        CFI.importance_selection.__doc__,
+    ],
+    """
+    Returns
+    -------
+    selection : ndarray of shape (n_features,)
+        Boolean array indicating selected features (True = selected)
+    importances : ndarray of shape (n_features,)
+        Feature importance scores/test statistics.
+    pvalues : ndarray of shape (n_features,)
+        P-values for importance scores. 
+    """,
+)
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 249813833..4a04b8039 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -11,8 +11,8 @@
 from hidimstat._utils.docstring import _aggregate_docstring
 from hidimstat._utils.utils import check_random_state, seed_estimator
 from hidimstat.base_variable_importance import BaseVariableImportance
+from hidimstat.samplers import GaussianKnockoffs
 from hidimstat.statistical_tools.aggregation import quantile_aggregation
-from hidimstat.statistical_tools.gaussian_knockoffs import GaussianKnockoffs
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
 
 
diff --git a/src/hidimstat/leave_one_covariate_out.py b/src/hidimstat/leave_one_covariate_out.py
index d84d6e5a3..dc48f3bfd 100644
--- a/src/hidimstat/leave_one_covariate_out.py
+++ b/src/hidimstat/leave_one_covariate_out.py
@@ -1,68 +1,82 @@
+from functools import partial
+
 import numpy as np
 import pandas as pd
 from joblib import Parallel, delayed
+from scipy.stats import wilcoxon
 from sklearn.base import check_is_fitted, clone
-from sklearn.metrics import root_mean_squared_error
+from sklearn.metrics import mean_squared_error
 
+from hidimstat._utils.docstring import _aggregate_docstring
+from hidimstat._utils.utils import check_statistical_test
 from hidimstat.base_perturbation import BasePerturbation
+from hidimstat.base_variable_importance import GroupVariableImportanceMixin
 
 
 class LOCO(BasePerturbation):
+    """
+    Leave-One-Covariate-Out (LOCO) algorithm
+
+    This method is presented in :footcite:t:`lei2018distribution` and :footcite:t:`verdinelli2024feature`.
+    The model is re-fitted for each feature/group of features. The importance is
+    then computed as the difference between the loss of the full model and the loss
+    of the model without the feature/group.
+
+    Parameters
+    ----------
+    estimator : sklearn compatible estimator
+        The estimator to use for the prediction.
+    method : str, default="predict"
+        The method to use for the prediction. This determines the predictions passed
+        to the loss function. Supported methods are "predict", "predict_proba" or
+        "decision_function".
+    loss : callable, default=mean_squared_error
+        The loss function to use when comparing the perturbed model to the full
+        model.
+    statistical_test : callable or str, default="ttest"
+        Statistical test function for computing p-values of importance scores.
+    features_groups: dict or None, default=None
+        A dictionary where the keys are the group names and the values are the
+        list of column names corresponding to each features group. If None,
+        the features_groups are identified based on the columns of X.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. Parallelization is done over the
+        variables or groups of variables.
+
+    Notes
+    -----
+    :footcite:t:`Williamson_General_2023` also presented a LOCO method with an
+    additional data splitting strategy.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+
     def __init__(
         self,
         estimator,
-        loss: callable = root_mean_squared_error,
         method: str = "predict",
+        loss: callable = mean_squared_error,
+        statistical_test="ttest",
         features_groups=None,
         n_jobs: int = 1,
     ):
-        """
-        Leave-One-Covariate-Out (LOCO) as presented in
-        :footcite:t:`lei2018distribution` and :footcite:t:`verdinelli2024feature`.
-        The model is re-fitted for each feature/group of features. The importance is
-        then computed as the difference between the loss of the full model and the loss
-        of the model without the feature/group.
-
-        Parameters
-        ----------
-        estimator : sklearn compatible estimator
-            The estimator to use for the prediction.
-        loss : callable, default=root_mean_squared_error
-            The loss function to use when comparing the perturbed model to the full
-            model.
-        method : str, default="predict"
-            The method to use for the prediction. This determines the predictions passed
-            to the loss function. Supported methods are "predict", "predict_proba" or
-            "decision_function".
-        features_groups: dict or None,  default=None
-            A dictionary where the keys are the group names and the values are the
-            list of column names corresponding to each features group. If None,
-            the features_groups are identified based on the columns of X.
-        n_jobs : int, default=1
-            The number of jobs to run in parallel. Parallelization is done over the
-            features or groups of features.
-
-        Notes
-        -----
-        :footcite:t:`Williamson_General_2023` also presented a LOCO method with an
-        additional data splitting strategy.
-
-        References
-        ----------
-        .. footbibliography::
-        """
         super().__init__(
             estimator=estimator,
-            loss=loss,
             method=method,
-            n_jobs=n_jobs,
+            loss=loss,
             n_permutations=1,
+            statistical_test=statistical_test,
             features_groups=features_groups,
+            n_jobs=n_jobs,
         )
-        self._list_estimators = []
+        # internal variable
+        self._list_estimators = None
 
     def fit(self, X, y):
-        """Fit a model after removing each covariate/group of covariates.
+        """
+        Fit a model after removing each covariate/group of covariates.
 
         Parameters
         ----------
@@ -93,6 +107,75 @@ def fit(self, X, y):
         )
         return self
 
+    def importance(self, X, y):
+        """
+        Compute the importance scores for each group of covariates.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples to compute importance scores for.
+        y : array-like of shape (n_samples,)
+
+        importances_ : ndarray of shape (n_groups,)
+            The importance scores for each group of covariates.
+            A higher score indicates greater importance of that group.
+
+        Returns
+        -------
+        importances_ : ndarray of shape (n_features,)
+            Importance scores for each feature.
+
+        Attributes
+        ----------
+        loss_reference_ : float
+            The loss of the model with the original (non-perturbed) data.
+        loss_ : dict
+            Dictionary with indices as keys and arrays of perturbed losses as values.
+            Contains the loss values for each permutation of each group.
+        importances_ : ndarray of shape (n_groups,)
+            The calculated importance scores for each group.
+        pvalues_ : ndarray of shape (n_groups,)
+            P-values from one-sided t-test testing if importance scores are
+            significantly greater than 0.
+
+        Notes
+        -----
+        The importance score for each group is calculated as the mean increase in loss
+        when that group is perturbed, compared to the reference loss.
+        A higher importance score indicates that perturbing that group leads to
+        worse model performance, suggesting those features are more important.
+        """
+        self._check_fit()
+        self._check_compatibility(X)
+        statistical_test = check_statistical_test(self.statistical_test)
+
+        y_pred = getattr(self.estimator, self.method)(X)
+        self.loss_reference_ = self.loss(y, y_pred)
+
+        y_pred = self._predict(X)
+        test_result = []
+        self.loss_ = dict()
+        for j, y_pred_j in enumerate(y_pred):
+            self.loss_[j] = np.array([self.loss(y, y_pred_j[0])])
+            if np.all(np.equal(y.shape, y_pred_j[0].shape)):
+                test_result.append(y - y_pred_j[0])
+            else:
+                test_result.append(y - np.unique(y)[np.argmax(y_pred_j[0], axis=-1)])
+
+        self.importances_ = np.mean(
+            [
+                self.loss_[j] - self.loss_reference_
+                for j in range(self.n_features_groups_)
+            ],
+            axis=1,
+        )
+        self.pvalues_ = statistical_test(np.array(test_result)).pvalue
+        assert (
+            self.pvalues_.shape[0] == y_pred.shape[0]
+        ), "The statistical test doesn't provide the correct dimension."
+        return self.importances_
+
     def _joblib_fit_one_features_group(self, estimator, X, y, key_features_group):
         """Fit the estimator after removing a group of covariates. Used in parallel."""
         if isinstance(X, pd.DataFrame):
@@ -103,7 +186,7 @@ def _joblib_fit_one_features_group(self, estimator, X, y, key_features_group):
         return estimator
 
     def _joblib_predict_one_features_group(
-        self, X, features_group_id, key_features_group, random_state=None
+        self, X, features_group_id, features_group_key, random_state=None
     ):
         """Predict the target feature after removing a group of covariates.
         Used in parallel."""
@@ -120,7 +203,60 @@ def _check_fit(self):
         covariates."""
         super()._check_fit()
         check_is_fitted(self.estimator)
-        if len(self._list_estimators) == 0:
+        if self._list_estimators is None:
             raise ValueError("The estimators require to be fit before to use them")
         for m in self._list_estimators:
             check_is_fitted(m)
+
+
+def loco_analysis(
+    estimator,
+    X,
+    y,
+    method: str = "predict",
+    loss: callable = mean_squared_error,
+    features_groups=None,
+    test_statistic="ttest",
+    k_best=None,
+    percentile=None,
+    threshold_min=None,
+    threshold_max=None,
+    n_jobs: int = 1,
+):
+    method = LOCO(
+        estimator=estimator,
+        method=method,
+        loss=loss,
+        statistical_test=test_statistic,
+        features_groups=features_groups,
+        n_jobs=n_jobs,
+    )
+    method.fit_importance(X, y)
+    selection = method.importance_selection(
+        k_best=k_best,
+        percentile=percentile,
+        threshold_min=threshold_min,
+        threshold_max=threshold_max,
+    )
+    return selection, method.importances_, method.pvalues_
+
+
+# use the docstring of the class for the function
+loco_analysis.__doc__ = _aggregate_docstring(
+    [
+        LOCO.__doc__,
+        LOCO.__init__.__doc__,
+        LOCO.fit_importance.__doc__,
+        LOCO.importance_selection.__doc__,
+    ],
+    """
+    Returns
+    -------
+    selection : ndarray of shape (n_features,)
+        Boolean array indicating selected features (True = selected)
+    importances : ndarray of shape (n_features,)
+        Feature importance scores/test statistics.
+    pvalues : ndarray of shape (n_features,)
+        None because there is no p-value for this method 
+    """,
+)
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 4a0dfc825..bb88927d4 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -1,64 +1,75 @@
+from functools import partial
+
 import numpy as np
-from sklearn.metrics import root_mean_squared_error
+from scipy.stats import wilcoxon
+from sklearn.metrics import mean_squared_error
 
+from hidimstat._utils.docstring import _aggregate_docstring
 from hidimstat._utils.utils import check_random_state
 from hidimstat.base_perturbation import BasePerturbation
 
 
 class PFI(BasePerturbation):
+    """
+    Permutation Feature Importance algorithm
+
+    This as presented in :footcite:t:`breimanRandomForests2001`.
+    For each variable/group of variables, the importance is computed as the
+    difference between the loss of the initial model and the loss of the model
+    with the variable/group permuted.
+    The method was also used in :footcite:t:`mi2021permutation`
+
+    Parameters
+    ----------
+    estimator : sklearn compatible estimator
+        The estimator to use for the prediction.
+    method : str, default="predict"
+        The method to use for the prediction. This determines the predictions passed
+        to the loss function. Supported methods are "predict", "predict_proba" or
+        "decision_function".
+    loss : callable, default=mean_squared_error
+        The loss function to use when comparing the perturbed model to the full
+        model.
+    n_permutations : int, default=50
+        The number of permutations to perform. For each variable/group of variables,
+        the mean of the losses over the `n_permutations` is computed.
+    statistical_test : callable or str, default="ttest"
+        Statistical test function for computing p-values of importance scores.
+    features_groups: dict or None, default=None
+        A dictionary where the keys are the group names and the values are the
+        list of column names corresponding to each features group. If None,
+        the features_groups are identified based on the columns of X.
+    random_state : int or None, default=None
+        The random state to use for sampling.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. Parallelization is done over the
+        variables or groups of variables.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+
     def __init__(
         self,
         estimator,
-        loss: callable = root_mean_squared_error,
         method: str = "predict",
+        loss: callable = mean_squared_error,
         n_permutations: int = 50,
+        statistical_test="ttest",
         features_groups=None,
         random_state: int = None,
         n_jobs: int = 1,
     ):
-        """
-        Permutation Feature Importance algorithm as presented in
-        :footcite:t:`breimanRandomForests2001`. For each feature/group of features,
-        the importance is computed as the difference between the loss of the initial
-        model and the loss of the model with the feature/group permuted.
-        The method was also used in :footcite:t:`mi2021permutation`
-
-        Parameters
-        ----------
-        estimator : sklearn compatible estimator, optionals
-            The estimator to use for the prediction.
-        loss : callable, default=root_mean_squared_error
-            The loss function to use when comparing the perturbed model to the full
-            model.
-        method : str, default="predict"
-            The method to use for the prediction. This determines the predictions passed
-            to the loss function. Supported methods are "predict", "predict_proba" or
-            "decision_function".
-        n_permutations : int, default=50
-            The number of permutations to perform. For each feature/group of features,
-            the mean of the losses over the `n_permutations` is computed.
-        features_groups: dict or None,  default=None
-            A dictionary where the keys are the group names and the values are the
-            list of column names corresponding to each features group. If None,
-            the features_groups are identified based on the columns of X.
-        random_state : int, default=None
-            The random state to use for sampling.
-        n_jobs : int, default=1
-            The number of jobs to run in parallel. Parallelization is done over the
-            features or groups of features.
-
-        References
-        ----------
-        .. footbibliography::
-        """
         super().__init__(
             estimator=estimator,
-            loss=loss,
             method=method,
-            n_jobs=n_jobs,
+            loss=loss,
             n_permutations=n_permutations,
+            statistical_test=statistical_test,
             features_groups=features_groups,
             random_state=random_state,
+            n_jobs=n_jobs,
         )
 
     def _permutation(self, X, features_group_id, random_state=None):
@@ -73,3 +84,60 @@ def _permutation(self, X, features_group_id, random_state=None):
             ]
         )
         return X_perm_j
+
+
+def pfi_analysis(
+    estimator,
+    X,
+    y,
+    method: str = "predict",
+    loss: callable = mean_squared_error,
+    n_permutations: int = 50,
+    test_statistic="ttest",
+    features_groups=None,
+    k_best=None,
+    percentile=None,
+    threshold_min=None,
+    threshold_max=None,
+    random_state: int = None,
+    n_jobs: int = 1,
+):
+    methods = PFI(
+        estimator=estimator,
+        method=method,
+        loss=loss,
+        n_permutations=n_permutations,
+        statistical_test=test_statistic,
+        features_groups=features_groups,
+        random_state=random_state,
+        n_jobs=n_jobs,
+    )
+    methods.fit_importance(X, y)
+    selection = methods.importance_selection(
+        k_best=k_best,
+        percentile=percentile,
+        threshold_min=threshold_min,
+        threshold_max=threshold_max,
+    )
+    return selection, methods.importances_, methods.pvalues_
+
+
+# use the docstring of the class for the function
+pfi_analysis.__doc__ = _aggregate_docstring(
+    [
+        PFI.__doc__,
+        PFI.__init__.__doc__,
+        PFI.fit_importance.__doc__,
+        PFI.importance_selection.__doc__,
+    ],
+    """
+    Returns
+    -------
+    selection : ndarray of shape (n_features,)
+        Boolean array indicating selected features (True = selected)
+    importances : ndarray of shape (n_features,)
+        Feature importance scores/test statistics.
+    pvalues : ndarray of shape (n_features,)
+         P-values for importance scores.
+    """,
+)
diff --git a/src/hidimstat/samplers/__init__.py b/src/hidimstat/samplers/__init__.py
new file mode 100644
index 000000000..6cb04420a
--- /dev/null
+++ b/src/hidimstat/samplers/__init__.py
@@ -0,0 +1,7 @@
+from .conditional_sampling import ConditionalSampler
+from .gaussian_knockoffs import GaussianKnockoffs
+
+__all__ = [
+    "ConditionalSampler",
+    "GaussianKnockoffs",
+]
diff --git a/src/hidimstat/statistical_tools/conditional_sampling.py b/src/hidimstat/samplers/conditional_sampling.py
similarity index 100%
rename from src/hidimstat/statistical_tools/conditional_sampling.py
rename to src/hidimstat/samplers/conditional_sampling.py
diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/samplers/gaussian_knockoffs.py
similarity index 100%
rename from src/hidimstat/statistical_tools/gaussian_knockoffs.py
rename to src/hidimstat/samplers/gaussian_knockoffs.py
diff --git a/src/hidimstat/statistical_tools/__init__.py b/src/hidimstat/statistical_tools/__init__.py
index 4012354d2..b713a79f6 100644
--- a/src/hidimstat/statistical_tools/__init__.py
+++ b/src/hidimstat/statistical_tools/__init__.py
@@ -1,9 +1,5 @@
-from .conditional_sampling import ConditionalSampler
-from .gaussian_knockoffs import GaussianKnockoffs
 from .nadeau_bengio_ttest import nadeau_bengio_ttest
 
 __all__ = [
-    "ConditionalSampler",
-    "GaussianKnockoffs",
     "nadeau_bengio_ttest",
 ]
diff --git a/test/_utils/test_utils.py b/test/_utils/test_utils.py
index 59bcba0ae..4a4abae5c 100644
--- a/test/_utils/test_utils.py
+++ b/test/_utils/test_utils.py
@@ -1,7 +1,28 @@
 import numpy as np
 import pytest
+from scipy.stats import ttest_1samp, wilcoxon
 
-from hidimstat._utils.utils import check_random_state
+from hidimstat._utils.utils import (
+    check_random_state,
+    check_statistical_test,
+    get_fitted_attributes,
+)
+from hidimstat.statistical_tools import nadeau_bengio_ttest
+
+
+def test_generated_attributes():
+    """Test function for getting generated attribute"""
+
+    class MyClass:
+        def __init__(self):
+            self.attr1 = 1
+            self.attr2_ = 2
+            self._attr3 = 3
+            self.attr4__ = 4
+            self.attr5_ = 5
+
+    attributes = get_fitted_attributes(MyClass())
+    assert attributes == ["attr2_", "attr5_"]
 
 
 def test_none():
@@ -41,3 +62,25 @@ def test_error():
         ValueError, match="cannot be used to seed a numpy.random.Generator instance"
     ):
         check_random_state(random_state)
+
+
+def test_check_test_statistic():
+    "test the function of check"
+    test_func = check_statistical_test("wilcoxon")
+    assert test_func.func == wilcoxon
+    test_func = check_statistical_test("ttest")
+    assert test_func.func == ttest_1samp
+    test_func = check_statistical_test("nb-ttest")
+    assert test_func.func == nadeau_bengio_ttest
+    test_func = check_statistical_test(print)
+    assert test_func == print
+    test_func = check_statistical_test(lambda x: x)
+    assert test_func.__class__.__name__ == "function"
+
+
+def test_check_test_statistic_warning():
+    "test the exception"
+    with pytest.raises(ValueError, match="the test 'test' is not supported"):
+        check_statistical_test("test")
+    with pytest.raises(ValueError, match="Unsupported value for 'statistical_test'."):
+        check_statistical_test([])
diff --git a/test/statistical_tools/test_conditional_sampling.py b/test/samplers/test_conditional_sampling.py
similarity index 99%
rename from test/statistical_tools/test_conditional_sampling.py
rename to test/samplers/test_conditional_sampling.py
index a3de06970..888493334 100644
--- a/test/statistical_tools/test_conditional_sampling.py
+++ b/test/samplers/test_conditional_sampling.py
@@ -11,7 +11,7 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.preprocessing import StandardScaler
 
-from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
+from hidimstat.samplers.conditional_sampling import ConditionalSampler
 
 
 def test_continuous_case():
diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/samplers/test_gaussian_knockoffs.py
similarity index 97%
rename from test/statistical_tools/test_gaussian_knockoffs.py
rename to test/samplers/test_gaussian_knockoffs.py
index 2e671a6c0..6948dbfc5 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/samplers/test_gaussian_knockoffs.py
@@ -2,10 +2,7 @@
 import pytest
 
 from hidimstat._utils.scenario import multivariate_simulation
-from hidimstat.statistical_tools.gaussian_knockoffs import (
-    GaussianKnockoffs,
-    _s_equi,
-)
+from hidimstat.samplers.gaussian_knockoffs import GaussianKnockoffs, _s_equi
 
 
 def test_gaussian_equi():
diff --git a/test/statistical_tools/test_nadeau_bengio_ttest.py b/test/statistical_tools/test_nadeau_bengio_ttest.py
index 2771a651c..1f9223934 100644
--- a/test/statistical_tools/test_nadeau_bengio_ttest.py
+++ b/test/statistical_tools/test_nadeau_bengio_ttest.py
@@ -1,3 +1,5 @@
+from functools import partial
+
 import numpy as np
 import numpy.ma.testutils as ma_npt
 import numpy.testing as npt
@@ -48,7 +50,7 @@ def test_ttest_1samp_corrected_NB(data_generator):
         )
         vim.fit(X_train, y_train)
         importances = vim.importance(X_test, y_test)
-        importance_list.append(importances["importance"])
+        importance_list.append(importances)
     importance_array = np.array(importance_list)
 
     pvalue_corr = nadeau_bengio_ttest(importance_array, 0, test_frac=0.2).pvalue
diff --git a/test/test_base_perturbation.py b/test/test_base_perturbation.py
index ea29c2566..8ed0b8202 100644
--- a/test/test_base_perturbation.py
+++ b/test/test_base_perturbation.py
@@ -13,3 +13,16 @@ def test_no_implemented_methods():
     basic_class = BasePerturbation(estimator=estimator)
     with pytest.raises(NotImplementedError):
         basic_class._permutation(X, features_group_id=None)
+
+
+def test_check_importance():
+    """test that the methods are not implemented in the base class"""
+    X = np.random.randint(0, 2, size=(100, 2, 1))
+    estimator = LinearRegression()
+    estimator.fit(X[:, 0], X[:, 1])
+    basic_class = BasePerturbation(estimator=estimator)
+    basic_class.importances_ = []
+    with pytest.raises(
+        ValueError, match="The importance method has not yet been called."
+    ):
+        basic_class.importance_selection()
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index 459525131..57d89611e 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -1,15 +1,22 @@
 from copy import deepcopy
+from functools import partial
 
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pytest
+from scipy.stats import ttest_1samp
 from sklearn.exceptions import NotFittedError
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from sklearn.metrics import log_loss, root_mean_squared_error
+from sklearn.linear_model import (
+    LinearRegression,
+    LogisticRegression,
+    LogisticRegressionCV,
+    RidgeCV,
+)
+from sklearn.metrics import log_loss, mean_squared_error
 from sklearn.model_selection import train_test_split
 
-from hidimstat import CFI
+from hidimstat import CFI, cfi_analysis
 from hidimstat._utils.exception import InternalError
 from hidimstat._utils.scenario import multivariate_simulation
 from hidimstat.base_perturbation import BasePerturbation
@@ -67,8 +74,7 @@ def run_cfi(X, y, n_permutation, seed):
     # fit the model using the training set
     cfi.fit(X_train)
     # calculate feature importance using the test set
-    vim = cfi.importance(X_test, y_test)
-    importance = vim["importance"]
+    importance = cfi.importance(X_test, y_test)
     return importance
 
 
@@ -198,9 +204,8 @@ def test_group(data_generator):
     cfi.fit(X_train_df)
     # Warning expected since column names in pandas are not considered
     with pytest.warns(UserWarning, match="X does not have valid feature names, but"):
-        vim = cfi.importance(X_test_df, y_test)
+        importance = cfi.importance(X_test_df, y_test)
 
-    importance = vim["importance"]
     # Check if importance scores are computed for each feature
     assert importance.shape == (2,)
     # Verify that important feature group has higher score
@@ -245,8 +250,7 @@ def test_classication(data_generator):
         n_jobs=1,
     )
     cfi.fit(X_train)
-    vim = cfi.importance(X_test, y_test_clf)
-    importance = vim["importance"]
+    importance = cfi.importance(X_test, y_test_clf)
     # Check that importance scores are defined for each feature
     assert importance.shape == (X.shape[1],)
     # Check that important features have higher mean importance scores
@@ -275,11 +279,11 @@ def test_init(self, data_generator):
         )
         assert cfi.n_jobs == 1
         assert cfi.n_permutations == 50
-        assert cfi.loss == root_mean_squared_error
+        assert cfi.loss == mean_squared_error
         assert cfi.method == "predict"
         assert cfi.categorical_max_cardinality == 10
-        assert cfi.imputation_model_categorical is None
-        assert cfi.imputation_model_continuous is None
+        assert isinstance(cfi.imputation_model_categorical, LogisticRegressionCV)
+        assert isinstance(cfi.imputation_model_continuous, RidgeCV)
 
     def test_fit(self, data_generator):
         """Test fitting CFI"""
@@ -342,7 +346,7 @@ def test_categorical(
         )
         cfi.fit(X, y)
 
-        importances = cfi.importance(X, y)["importance"]
+        importances = cfi.importance(X, y)
         assert len(importances) == 3
 
 
@@ -374,18 +378,6 @@ def test_unknown_predict_method(self, data_generator):
                 method="unknown method",
             )
 
-    def test_unfitted_predict(self, data_generator):
-        """Test predict method with unfitted model"""
-        X, y, _, _ = data_generator
-        fitted_model = LinearRegression().fit(X, y)
-        cfi = CFI(
-            estimator=fitted_model,
-            method="predict",
-        )
-
-        with pytest.raises(ValueError, match="The class is not fitted."):
-            cfi.predict(X)
-
     def test_unfitted_importance(self, data_generator):
         """Test importance method with unfitted model"""
         X, y, _, _ = data_generator
@@ -598,6 +590,41 @@ def test_groups_warning(self, data_generator):
         ):
             cfi.importance(X, y)
 
+    def test_assert_dimension_pvalue(self, data_generator):
+        """test that assert is raise if function stat is not good"""
+        X, y, _, _ = data_generator
+        fitted_model = LinearRegression().fit(X, y)
+        cfi = CFI(
+            estimator=fitted_model,
+            imputation_model_continuous=LinearRegression(),
+            statistical_test=partial(ttest_1samp, popmean=0, axis=0),
+        )
+        cfi.fit(X, y)
+        with pytest.raises(
+            AssertionError,
+            match="The statistical test doesn't provide the correct dimension.",
+        ):
+            cfi.importance(X, y)
+
+
+@pytest.mark.parametrize(
+    "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
+    [(150, 200, 10, 0.2, 42, 1.0, 1.0, 0.0)],
+    ids=["high level noise"],
+)
+@pytest.mark.parametrize("n_permutation, cfi_seed", [(20, 0)], ids=["default_cfi"])
+def test_function_cfi(data_generator, n_permutation, cfi_seed):
+    """Test CFI function"""
+    X, y, _, _ = data_generator
+    cfi_analysis(
+        LinearRegression().fit(X, y),
+        X,
+        y,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=n_permutation,
+        random_state=cfi_seed,
+    )
+
 
 @pytest.mark.parametrize(
     "n_samples, n_features, support_size, rho, seed, value, signal_noise_ratio, rho_serial",
@@ -619,6 +646,8 @@ def test_cfi_plot(data_generator):
         random_state=0,
     )
     cfi.fit(X_train, y_train)
+    cfi.loss_reference_ = []
+    cfi.loss_ = []
     # Make the plot independent of data / randomness to test only the plotting function
     cfi.importances_ = np.arange(X.shape[1])
     fig, ax = plt.subplots(figsize=(6, 3))
@@ -646,6 +675,8 @@ def test_cfi_plot_2d_imp(data_generator):
         random_state=0,
     )
     cfi.fit(X_train, y_train)
+    cfi.loss_reference_ = []
+    cfi.loss_ = []
     # Make the plot independent of data / randomness to test only the plotting function
     cfi.importances_ = np.stack(
         [
@@ -678,6 +709,8 @@ def test_cfi_plot_coverage(data_generator):
         random_state=0,
     )
     cfi.fit(X_train, y_train)
+    cfi.loss_reference_ = []
+    cfi.loss_ = []
     # Make the plot independent of data / randomness to test only the plotting function
     cfi.importances_ = np.arange(X.shape[1])
     _, ax = plt.subplots(figsize=(6, 3))
@@ -729,9 +762,9 @@ def test_cfi_repeatibility(cfi_test_data):
     X_train, X_test, y_test, cfi_default_parameters = cfi_test_data
     cfi = CFI(**cfi_default_parameters)
     cfi.fit(X_train)
-    vim = cfi.importance(X_test, y_test)["importance"]
+    vim = cfi.importance(X_test, y_test)
     # repeat
-    vim_repeat = cfi.importance(X_test, y_test)["importance"]
+    vim_repeat = cfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_repeat)
 
 
@@ -742,20 +775,20 @@ def test_cfi_randomness_with_none(cfi_test_data):
     X_train, X_test, y_test, cfi_default_parameters = cfi_test_data
     cfi = CFI(random_state=None, **cfi_default_parameters)
     cfi.fit(X_train)
-    vim = cfi.importance(X_test, y_test)["importance"]
+    vim = cfi.importance(X_test, y_test)
     # repeat importance
-    vim_repeat = cfi.importance(X_test, y_test)["importance"]
+    vim_repeat = cfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_repeat)
 
     # refit
     cfi.fit(X_train)
-    vim_refit = cfi.importance(X_test, y_test)["importance"]
+    vim_refit = cfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_refit)
 
     # Reproducibility
     cfi_2 = CFI(random_state=None, **cfi_default_parameters)
     cfi_2.fit(X_train)
-    vim_reproducibility = cfi_2.importance(X_test, y_test)["importance"]
+    vim_reproducibility = cfi_2.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_reproducibility)
 
 
@@ -766,20 +799,20 @@ def test_cfi_reproducibility_with_integer(cfi_test_data):
     X_train, X_test, y_test, cfi_default_parameters = cfi_test_data
     cfi = CFI(random_state=42, **cfi_default_parameters)
     cfi.fit(X_train)
-    vim = cfi.importance(X_test, y_test)["importance"]
+    vim = cfi.importance(X_test, y_test)
     # repeat importance
-    vim_repeat = cfi.importance(X_test, y_test)["importance"]
+    vim_repeat = cfi.importance(X_test, y_test)
     assert np.array_equal(vim, vim_repeat)
 
     # refit
     cfi.fit(X_train)
-    vim_refit = cfi.importance(X_test, y_test)["importance"]
+    vim_refit = cfi.importance(X_test, y_test)
     assert np.array_equal(vim, vim_refit)
 
     # Reproducibility
     cfi_2 = CFI(random_state=42, **cfi_default_parameters)
     cfi_2.fit(X_train)
-    vim_reproducibility = cfi_2.importance(X_test, y_test)["importance"]
+    vim_reproducibility = cfi_2.importance(X_test, y_test)
     assert np.array_equal(vim, vim_reproducibility)
 
 
@@ -793,25 +826,25 @@ def test_cfi_reproducibility_with_rng(cfi_test_data):
     rng = np.random.default_rng(0)
     cfi = CFI(random_state=rng, **cfi_default_parameters)
     cfi.fit(X_train)
-    vim = cfi.importance(X_test, y_test)["importance"]
+    vim = cfi.importance(X_test, y_test)
     # repeat importance
-    vim_repeat = cfi.importance(X_test, y_test)["importance"]
+    vim_repeat = cfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_repeat)
 
     # refit
     cfi.fit(X_train)
-    vim_refit = cfi.importance(X_test, y_test)["importance"]
+    vim_refit = cfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_refit)
 
     # refit repeatability
     rng = np.random.default_rng(0)
     cfi.random_state = rng
     cfi.fit(X_train)
-    vim_refit_2 = cfi.importance(X_test, y_test)["importance"]
+    vim_refit_2 = cfi.importance(X_test, y_test)
     assert np.array_equal(vim, vim_refit_2)
 
     # Reproducibility
     cfi_2 = CFI(random_state=np.random.default_rng(0), **cfi_default_parameters)
     cfi_2.fit(X_train)
-    vim_reproducibility = cfi_2.importance(X_test, y_test)["importance"]
+    vim_reproducibility = cfi_2.importance(X_test, y_test)
     assert np.array_equal(vim, vim_reproducibility)
diff --git a/test/test_knockoff.py b/test/test_knockoff.py
index a19e2e56b..cf1ccdf14 100644
--- a/test/test_knockoff.py
+++ b/test/test_knockoff.py
@@ -11,7 +11,7 @@
     model_x_knockoff,
     set_alpha_max_lasso_path,
 )
-from hidimstat.statistical_tools.gaussian_knockoffs import GaussianKnockoffs
+from hidimstat.samplers import GaussianKnockoffs
 from hidimstat.statistical_tools.multiple_testing import fdp_power
 
 
diff --git a/test/test_leave_one_covariate_out.py b/test/test_leave_one_covariate_out.py
index ea3738634..1c8c3b243 100644
--- a/test/test_leave_one_covariate_out.py
+++ b/test/test_leave_one_covariate_out.py
@@ -1,12 +1,15 @@
+from functools import partial
+
 import numpy as np
 import pandas as pd
 import pytest
+from scipy.stats import ttest_1samp
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 
-from hidimstat import LOCO
+from hidimstat import LOCO, loco_analysis
 from hidimstat._utils.scenario import multivariate_simulation
 from hidimstat.base_perturbation import BasePerturbation
 
@@ -39,9 +42,8 @@ def test_loco():
         X_train,
         y_train,
     )
-    vim = loco.importance(X_test, y_test)
+    importance = loco.importance(X_test, y_test)
 
-    importance = vim["importance"]
     assert importance.shape == (X.shape[1],)
     assert (
         importance[important_features].mean()
@@ -68,9 +70,8 @@ def test_loco():
     )
     # warnings because we doesn't consider the name of columns of pandas
     with pytest.warns(UserWarning, match="X does not have valid feature names, but"):
-        vim = loco.importance(X_test_df, y_test)
+        importance = loco.importance(X_test_df, y_test)
 
-    importance = vim["importance"]
     assert importance[0].mean() > importance[1].mean()
 
     # Classification case
@@ -93,11 +94,10 @@ def test_loco():
         X_train,
         y_train_clf,
     )
-    vim_clf = loco_clf.importance(X_test, y_test_clf)
+    importance_clf = loco_clf.importance(X_test, y_test_clf)
 
-    importance_clf = vim_clf["importance"]
     assert importance_clf.shape == (2,)
-    assert importance[0].mean() > importance[1].mean()
+    assert importance_clf[0].mean() > importance_clf[1].mean()
 
 
 def test_raises_value_error():
@@ -123,7 +123,7 @@ def test_raises_value_error():
             estimator=fitted_model,
             method="predict",
         )
-        loco.predict(X)
+        loco.importance(X, None)
     with pytest.raises(ValueError, match="The class is not fitted."):
         fitted_model = LinearRegression().fit(X, y)
         loco = LOCO(
@@ -142,3 +142,46 @@ def test_raises_value_error():
         )
         BasePerturbation.fit(loco, X, y)
         loco.importance(X, y)
+
+    with pytest.raises(
+        AssertionError,
+        match="The statistical test doesn't provide the correct dimension.",
+    ):
+        fitted_model = LinearRegression().fit(X, y)
+        loco = LOCO(
+            estimator=fitted_model,
+            statistical_test=partial(ttest_1samp, popmean=0, axis=0),
+        ).fit(X, y)
+        loco.importance(X, y)
+
+
+def test_loco_function():
+    """Test the function of LOCO algorithm on a linear scenario."""
+    X, y, beta, noise = multivariate_simulation(
+        n_samples=150,
+        n_features=100,
+        support_size=10,
+        shuffle=False,
+        seed=42,
+    )
+    important_features = np.where(beta != 0)[0]
+    non_important_features = np.where(beta == 0)[0]
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    regression_model = LinearRegression()
+    regression_model.fit(X_train, y_train)
+
+    selection, importance, pvalue = loco_analysis(
+        regression_model,
+        X,
+        y,
+        method="predict",
+        n_jobs=1,
+    )
+
+    assert importance.shape == (X.shape[1],)
+    assert (
+        importance[important_features].mean()
+        > importance[non_important_features].mean()
+    )
diff --git a/test/test_permutation_feature_importance.py b/test/test_permutation_feature_importance.py
index 0e7ee0c0d..f806486ce 100644
--- a/test/test_permutation_feature_importance.py
+++ b/test/test_permutation_feature_importance.py
@@ -5,7 +5,7 @@
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 
-from hidimstat import PFI
+from hidimstat import PFI, pfi_analysis
 from hidimstat._utils.scenario import multivariate_simulation
 
 
@@ -39,9 +39,8 @@ def test_permutation_importance():
         X_train,
         y_train,
     )
-    vim = pfi.importance(X_test, y_test)
+    importance = pfi.importance(X_test, y_test)
 
-    importance = vim["importance"]
     assert importance.shape == (X.shape[1],)
     assert (
         importance[important_features].mean()
@@ -70,9 +69,8 @@ def test_permutation_importance():
     )
     # warnings because we doesn't consider the name of columns of pandas
     with pytest.warns(UserWarning, match="X does not have valid feature names, but"):
-        vim = pfi.importance(X_test_df, y_test)
+        importance = pfi.importance(X_test_df, y_test)
 
-    importance = vim["importance"]
     assert importance[0].mean() > importance[1].mean()
 
     # Classification case
@@ -95,12 +93,43 @@ def test_permutation_importance():
         X_train,
         y_train_clf,
     )
-    vim_clf = pfi_clf.importance(X_test, y_test_clf)
+    importance_clf = pfi_clf.importance(X_test, y_test_clf)
 
-    importance_clf = vim_clf["importance"]
     assert importance_clf.shape == (X.shape[1],)
 
 
+def test_permutation_importance_function():
+    """Test the function of Permutation Importance algorithm on a linear scenario."""
+    X, y, beta, noise = multivariate_simulation(
+        n_samples=150,
+        n_features=200,
+        support_size=10,
+        shuffle=False,
+        seed=42,
+    )
+    important_features = beta != 0
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    regression_model = LinearRegression()
+    regression_model.fit(X_train, y_train)
+
+    selection, importance, pvalue = pfi_analysis(
+        regression_model,
+        X,
+        y,
+        n_permutations=20,
+        method="predict",
+        random_state=0,
+        n_jobs=1,
+    )
+
+    assert importance.shape == (X.shape[1],)
+    assert (
+        importance[important_features].mean() > importance[~important_features].mean()
+    )
+
+
 @pytest.fixture(scope="module")
 def pfi_test_data():
     """
@@ -137,8 +166,8 @@ def test_pfi_repeatability(pfi_test_data):
     X_train, X_test, y_train, y_test, pfi_default_parameters = pfi_test_data
     pfi = PFI(**pfi_default_parameters, random_state=0)
     pfi.fit(X_train, y_train)
-    vim = pfi.importance(X_test, y_test)["importance"]
-    vim_reproducible = pfi.importance(X_test, y_test)["importance"]
+    vim = pfi.importance(X_test, y_test)
+    vim_reproducible = pfi.importance(X_test, y_test)
     assert np.array_equal(vim, vim_reproducible)
 
 
@@ -150,17 +179,17 @@ def test_pfi_randomness_with_none(pfi_test_data):
     X_train, X_test, y_train, y_test, pfi_default_parameters = pfi_test_data
     pfi_fixed = PFI(**pfi_default_parameters, random_state=0)
     pfi_fixed.fit(X_train, y_train)
-    vim_fixed = pfi_fixed.importance(X_test, y_test)["importance"]
+    vim_fixed = pfi_fixed.importance(X_test, y_test)
 
     pfi_new_state = PFI(**pfi_default_parameters, random_state=1)
     pfi_new_state.fit(X_train, y_train)
-    vim_new_state = pfi_new_state.importance(X_test, y_test)["importance"]
+    vim_new_state = pfi_new_state.importance(X_test, y_test)
     assert not np.array_equal(vim_fixed, vim_new_state)
 
     pfi_none_state = PFI(**pfi_default_parameters, random_state=None)
     pfi_none_state.fit(X_train, y_train)
-    vim_none_state_1 = pfi_none_state.importance(X_test, y_test)["importance"]
-    vim_none_state_2 = pfi_none_state.importance(X_test, y_test)["importance"]
+    vim_none_state_1 = pfi_none_state.importance(X_test, y_test)
+    vim_none_state_2 = pfi_none_state.importance(X_test, y_test)
     assert not np.array_equal(vim_none_state_1, vim_none_state_2)
 
 
@@ -172,11 +201,11 @@ def test_pfi_reproducibility_with_integer(pfi_test_data):
     X_train, X_test, y_train, y_test, pfi_default_parameters = pfi_test_data
     pfi_1 = PFI(**pfi_default_parameters, random_state=0)
     pfi_1.fit(X_train, y_train)
-    vim_1 = pfi_1.importance(X_test, y_test)["importance"]
+    vim_1 = pfi_1.importance(X_test, y_test)
 
     pfi_2 = PFI(**pfi_default_parameters, random_state=0)
     pfi_2.fit(X_train, y_train)
-    vim_2 = pfi_2.importance(X_test, y_test)["importance"]
+    vim_2 = pfi_2.importance(X_test, y_test)
     assert np.array_equal(vim_1, vim_2)
 
 
@@ -190,13 +219,13 @@ def test_pfi_reproducibility_with_rng(pfi_test_data):
     rng = np.random.default_rng(0)
     pfi = PFI(**pfi_default_parameters, random_state=rng)
     pfi.fit(X_train, y_train)
-    vim = pfi.importance(X_test, y_test)["importance"]
-    vim_repeat = pfi.importance(X_test, y_test)["importance"]
+    vim = pfi.importance(X_test, y_test)
+    vim_repeat = pfi.importance(X_test, y_test)
     assert not np.array_equal(vim, vim_repeat)
 
     # Refit with same rng
     rng = np.random.default_rng(0)
     pfi_reproducibility = PFI(**pfi_default_parameters, random_state=rng)
     pfi_reproducibility.fit(X_train, y_train)
-    vim_reproducibility = pfi_reproducibility.importance(X_test, y_test)["importance"]
+    vim_reproducibility = pfi_reproducibility.importance(X_test, y_test)
     assert np.array_equal(vim, vim_reproducibility)