From 72094ff86acf5347610d2308bf1388f354667a2e Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 22 Aug 2025 18:27:40 +0200
Subject: [PATCH 01/46] Improve reproducbility example

---
 ...ot_diabetes_variable_importance_example.py | 51 ++++++++-----------
 .../plot_importance_classification_iris.py    | 19 +++++--
 examples/plot_knockoff_aggregation.py         | 22 +++++---
 .../plot_pitfalls_permutation_importance.py   | 32 ++++++++----
 4 files changed, 73 insertions(+), 51 deletions(-)

diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index c9c1d0a6d..53f7407fa 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -54,9 +54,12 @@
 from sklearn.linear_model import LogisticRegressionCV, RidgeCV
 from sklearn.metrics import r2_score, root_mean_squared_error
 from sklearn.model_selection import KFold
+from sklearn.utils import check_random_state
 
 from hidimstat import CPI, LOCO, PFI
 
+seeds = check_random_state(42).randint(1, np.iinfo(np.int32).max, 7)
+
 #############################################################################
 # Load the diabetes dataset
 # -------------------------
@@ -71,30 +74,12 @@
 # diabetes dataset.
 
 n_folds = 5
-regressor = RidgeCV(alphas=np.logspace(-3, 3, 10))
-regressor_list = [clone(regressor) for _ in range(n_folds)]
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
-for i, (train_index, test_index) in enumerate(kf.split(X)):
-    regressor_list[i].fit(X[train_index], y[train_index])
-    score = r2_score(
-        y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
-    )
-    mse = root_mean_squared_error(
-        y_true=y[test_index], y_pred=regressor_list[i].predict(X[test_index])
-    )
-
-    print(f"Fold {i}: {score}")
-    print(f"Fold {i}: {mse}")
-#############################################################################
-# Fit a baselien model on the diabetes dataset
-# --------------------------------------------
-# We use a Ridge regression model with a 10-fold cross-validation to fit the
-# diabetes dataset.
-
-n_folds = 10
-regressor = RidgeCV(alphas=np.logspace(-3, 3, 10))
+regressor = RidgeCV(
+    alphas=np.logspace(-3, 3, 10),
+    cv=KFold(shuffle=True, random_state=seeds[0]),
+)
 regressor_list = [clone(regressor) for _ in range(n_folds)]
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     regressor_list[i].fit(X[train_index], y[train_index])
     score = r2_score(
@@ -112,17 +97,23 @@
 # --------------------------------------------------------
 
 cpi_importance_list = []
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
     cpi = CPI(
         estimator=regressor_list[i],
-        imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10)),
-        imputation_model_categorical=LogisticRegressionCV(Cs=np.logspace(-2, 2, 10)),
-        # covariate_estimator=HistGradientBoostingRegressor(random_state=0,),
+        imputation_model_continuous=RidgeCV(
+            alphas=np.logspace(-3, 3, 10),
+            cv=KFold(shuffle=True, random_state=seeds[3]),
+        ),
+        imputation_model_categorical=LogisticRegressionCV(
+            Cs=np.logspace(-2, 2, 10),
+            cv=KFold(shuffle=True, random_state=seeds[4]),
+        ),
         n_permutations=50,
-        random_state=0,
+        random_state=seeds[5],
         n_jobs=4,
     )
     cpi.fit(X_train, y_train)
@@ -134,7 +125,7 @@
 # ---------------------------------------------------------
 
 loco_importance_list = []
-
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
@@ -153,7 +144,7 @@
 # ----------------------------------------------------------------
 
 pfi_importance_list = []
-
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
@@ -161,7 +152,7 @@
     pfi = PFI(
         estimator=regressor_list[i],
         n_permutations=50,
-        random_state=0,
+        random_state=seeds[6],
         n_jobs=4,
     )
     pfi.fit(X_train, y_train)
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index 6216ce044..a3c166a92 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -77,7 +77,9 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CPI", groups=No
     if vim_name == "CPI":
         vim = CPI(
             estimator=model_c,
-            imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10)),
+            imputation_model_continuous=RidgeCV(
+                alphas=np.logspace(-3, 3, 10), cv=KFold(shuffle=True, random_state=1)
+            ),
             n_permutations=50,
             random_state=0,
             method=method,
@@ -112,10 +114,19 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CPI", groups=No
 # combination, in parallel.
 
 models = [
-    LogisticRegressionCV(Cs=np.logspace(-3, 3, 10), tol=1e-3, max_iter=1000),
-    GridSearchCV(SVC(kernel="rbf"), {"C": np.logspace(-3, 3, 10)}),
+    LogisticRegressionCV(
+        Cs=np.logspace(-3, 3, 10),
+        tol=1e-3,
+        max_iter=1000,
+        cv=KFold(shuffle=True, random_state=2),
+    ),
+    GridSearchCV(
+        SVC(kernel="rbf"),
+        {"C": np.logspace(-3, 3, 10)},
+        cv=KFold(shuffle=True, random_state=3),
+    ),
 ]
-cv = KFold(n_splits=5, shuffle=True, random_state=0)
+cv = KFold(n_splits=5, shuffle=True, random_state=4)
 groups = {ft: i for i, ft in enumerate(dataset.feature_names)}
 out_list = Parallel(n_jobs=5)(
     delayed(run_one_fold)(
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index b4ba66af5..14ea794a7 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -73,7 +73,8 @@
 #######################################################################
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
-def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=None):
+def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
+    seeds = check_random_state(seed).randint(1, np.iinfo(np.int32).max, 4)
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr
@@ -85,10 +86,10 @@ def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, see
         y,
         estimator=LassoCV(
             n_jobs=1,
-            cv=KFold(n_splits=5, shuffle=True, random_state=0),
+            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[0]),
         ),
         n_bootstraps=1,
-        random_state=seed,
+        random_state=seeds[1],
     )
     mx_selection, _ = model_x_knockoff_pvalue(test_scores, fdr=fdr)
     fdp_mx, power_mx = fdp_power(mx_selection, non_zero_index)
@@ -99,11 +100,11 @@ def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, see
         y,
         estimator=LassoCV(
             n_jobs=1,
-            cv=KFold(n_splits=5, shuffle=True, random_state=0),
+            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[2]),
         ),
         n_bootstraps=n_bootstraps,
         n_jobs=1,
-        random_state=seed,
+        random_state=seeds[3],
     )
 
     # Use p-values aggregation [2]
@@ -131,7 +132,7 @@ def plot_results(bounds, fdr, n_samples, n_features, power=False):
     for nb in range(len(bounds)):
         for i in range(len(bounds[nb])):
             y = bounds[nb][i]
-            x = np.random.normal(nb + 1, 0.05)
+            x = rng.normal(nb + 1, 0.05)
             plt.scatter(x, y, alpha=0.65, c="blue")
 
     plt.boxplot(bounds, sym="")
@@ -165,7 +166,14 @@ def effect_number_samples(n_samples):
     parallel = Parallel(n_jobs, verbose=joblib_verbose)
     results = parallel(
         delayed(single_run)(
-            n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=seed
+            n_samples,
+            n_features,
+            rho,
+            sparsity,
+            snr,
+            fdr,
+            n_bootstraps,
+            seed=seed,
         )
         for seed in seed_list
     )
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index e456349be..2592345c4 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -24,11 +24,13 @@
 from sklearn.neural_network import MLPRegressor
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
 
 from hidimstat import CPI, PFI
 from hidimstat.conditional_sampling import ConditionalSampler
 
-rng = np.random.RandomState(0)
+rng = check_random_state(42)
+seeds = rng.randint(1, np.iinfo(np.int32).max, 9)
 
 #############################################################################
 # Load the California housing dataset and add a spurious feature
@@ -40,7 +42,9 @@
 dataset = fetch_california_housing()
 X_, y_ = dataset.data, dataset.target
 # only use 2/3 of samples to speed up the example
-X, _, y, _ = train_test_split(X_, y_, test_size=0.6667, random_state=0, shuffle=True)
+X, _, y, _ = train_test_split(
+    X_, y_, test_size=0.6667, random_state=seeds[0], shuffle=True
+)
 
 redundant_coef = rng.choice(np.arange(X.shape[1]), size=(3,), replace=False)
 X_spurious = X[:, redundant_coef].sum(axis=1)
@@ -85,7 +89,7 @@
     regressor=make_pipeline(
         StandardScaler(),
         MLPRegressor(
-            random_state=0,
+            random_state=seeds[1],
             hidden_layer_sizes=(32, 16, 8),
             early_stopping=True,
             learning_rate_init=0.01,
@@ -96,7 +100,7 @@
 )
 
 
-kf = KFold(n_splits=5, shuffle=True, random_state=0)
+kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
 for train_index, test_index in kf.split(X):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -118,6 +122,7 @@
 # testing conditional importance, as it identifies the spurious feature as important.
 permutation_importances = []
 conditional_permutation_importances = []
+kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -128,7 +133,8 @@
     pfi = PFI(
         model_c,
         n_permutations=50,
-        random_state=0,
+        n_jobs=5,
+        random_state=seeds[3],
     )
     pfi.fit(X_test, y_test)
 
@@ -185,6 +191,7 @@
 # explained by the other features unchanged. This method is valid for testing conditional
 # importance. As shown below, it does not identify the spurious feature as important.
 conditional_importances = []
+kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -194,8 +201,11 @@
     # Compute conditional permutation feature importance
     cpi = CPI(
         model_c,
-        imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 5)),
-        random_state=0,
+        imputation_model_continuous=RidgeCV(
+            alphas=np.logspace(-3, 3, 5),
+            cv=KFold(shuffle=True, random_state=seeds[4]),
+        ),
+        random_state=seeds[5],
         n_jobs=5,
     )
     cpi.fit(X_test, y_test)
@@ -251,12 +261,14 @@
 X_train, X_test = train_test_split(
     X,
     test_size=0.3,
-    random_state=0,
+    random_state=seeds[6],
 )
 
 conditional_sampler = ConditionalSampler(
-    model_regression=RidgeCV(alphas=np.logspace(-3, 3, 5)),
-    random_state=0,
+    model_regression=RidgeCV(
+        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=seeds[7])
+    ),
+    random_state=seeds[8],
 )
 
 

From fb8acdbfd3b8d398441b9d5b713f049896388f49 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 22 Aug 2025 18:28:06 +0200
Subject: [PATCH 02/46] Improve reproducibility in code base

---
 src/hidimstat/base_perturbation.py            | 21 ++++++++++++++-----
 .../conditional_permutation_importance.py     | 14 +++++++------
 src/hidimstat/knockoffs.py                    | 13 +++++-------
 src/hidimstat/leave_one_covariate_out.py      |  2 +-
 .../permutation_feature_importance.py         |  8 +++----
 5 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 437e1aeee..5332fa07a 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -49,6 +49,7 @@ def __init__(
         self.n_jobs = n_jobs
         self.n_permutations = n_permutations
         self.n_groups = None
+        self.random_state = None
 
     def fit(self, X, y=None, groups=None):
         """Base fit method for perturbation-based methods. Identifies the groups.
@@ -105,9 +106,17 @@ def predict(self, X):
         X_ = np.asarray(X)
 
         # Parallelize the computation of the importance scores for each group
+        if self.random_state is None:
+            list_seed = [None for i in range(self.n_groups)]
+        else:
+            list_seed = self.random_state.randint(
+                1, np.iinfo(np.int32).max, self.n_groups
+            )
         out_list = Parallel(n_jobs=self.n_jobs)(
-            delayed(self._joblib_predict_one_group)(X_, group_id, group_key)
-            for group_id, group_key in enumerate(self.groups.keys())
+            delayed(self._joblib_predict_one_group)(X_, group_id, group_key, seed)
+            for group_id, (group_key, seed) in enumerate(
+                zip(self.groups.keys(), list_seed)
+            )
         )
         return np.stack(out_list, axis=0)
 
@@ -168,7 +177,7 @@ def _check_fit(self):
                 " call fit with groups=None"
             )
 
-    def _joblib_predict_one_group(self, X, group_id, group_key):
+    def _joblib_predict_one_group(self, X, group_id, group_key, seed):
         """
         Compute the predictions after perturbation of the data for a given
         group of variables. This function is parallelized.
@@ -181,6 +190,8 @@ def _joblib_predict_one_group(self, X, group_id, group_key):
             The index of the group of variables.
         group_key: str, int
             The key of the group of variables. (parameter use for debugging)
+        seed: int, optional
+            Random seed for reproducibility.
         """
         group_ids = self._groups_ids[group_id]
         non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
@@ -188,7 +199,7 @@ def _joblib_predict_one_group(self, X, group_id, group_key):
         # where the j-th group of covariates is permuted
         X_perm = np.empty((self.n_permutations, X.shape[0], X.shape[1]))
         X_perm[:, :, non_group_ids] = np.delete(X, group_ids, axis=1)
-        X_perm[:, :, group_ids] = self._permutation(X, group_id=group_id)
+        X_perm[:, :, group_ids] = self._permutation(X, group_id=group_id, seed=seed)
         # Reshape X_perm to allow for batch prediction
         X_perm_batch = X_perm.reshape(-1, X.shape[1])
         y_pred_perm = getattr(self.estimator, self.method)(X_perm_batch)
@@ -202,6 +213,6 @@ def _joblib_predict_one_group(self, X, group_id, group_key):
             )
         return y_pred_perm
 
-    def _permutation(self, X, group_id):
+    def _permutation(self, X, group_id, seed):
         """Method for creating the permuted data for the j-th group of covariates."""
         raise NotImplementedError
diff --git a/src/hidimstat/conditional_permutation_importance.py b/src/hidimstat/conditional_permutation_importance.py
index e0a71e064..746ef873d 100644
--- a/src/hidimstat/conditional_permutation_importance.py
+++ b/src/hidimstat/conditional_permutation_importance.py
@@ -72,7 +72,7 @@ def __init__(
         self.categorical_max_cardinality = categorical_max_cardinality
         self.imputation_model_categorical = imputation_model_categorical
         self.imputation_model_continuous = imputation_model_continuous
-        self.random_state = random_state
+        self.random_state = check_random_state(random_state)
 
     def fit(self, X, y=None, groups=None, var_type="auto"):
         """Fit the imputation models.
@@ -96,7 +96,6 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
         self : object
             Returns the instance itself.
         """
-        self.random_state = check_random_state(self.random_state)
         super().fit(X, None, groups=groups)
         if isinstance(var_type, str):
             self.var_type = [var_type for _ in range(self.n_groups)]
@@ -105,7 +104,7 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
 
         self._list_imputation_models = [
             ConditionalSampler(
-                data_type=self.var_type[groupd_id],
+                data_type=self.var_type[group_id],
                 model_regression=(
                     None
                     if self.imputation_model_continuous is None
@@ -116,10 +115,13 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     if self.imputation_model_categorical is None
                     else clone(self.imputation_model_categorical)
                 ),
-                random_state=self.random_state,
+                random_state=seed,
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
-            for groupd_id in range(self.n_groups)
+            for group_id, seed in zip(
+                range(self.n_groups),
+                self.random_state.randint(0, np.iinfo(np.int32).max, self.n_groups),
+            )
         ]
 
         # Parallelize the fitting of the covariate estimators
@@ -149,7 +151,7 @@ def _check_fit(self):
         for m in self._list_imputation_models:
             check_is_fitted(m.model)
 
-    def _permutation(self, X, group_id):
+    def _permutation(self, X, group_id, seed):
         """Sample from the conditional distribution using a permutation of the
         residuals."""
         X_j = X[:, self._groups_ids[group_id]].copy()
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 7dab07aeb..b01605051 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -6,6 +6,7 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_memory
+from sklearn.base import clone
 
 from hidimstat.gaussian_knockoff import (
     gaussian_knockoff_generation,
@@ -188,13 +189,9 @@ def model_x_knockoff(
     parallel = Parallel(n_jobs, verbose=joblib_verbose)
 
     # get the seed for the different run
-    if isinstance(random_state, (int, np.int32, np.int64)):
-        rng = check_random_state(random_state)
-    elif random_state is None:
-        rng = check_random_state(0)
-    else:
-        raise TypeError("Wrong type for random_state")
-    seed_list = rng.randint(1, np.iinfo(np.int32).max, n_bootstraps)
+    seed_list = check_random_state(random_state).randint(
+        0, np.iinfo(np.int32).max, n_bootstraps
+    )
 
     if centered:
         X = StandardScaler().fit_transform(X)
@@ -225,7 +222,7 @@ def model_x_knockoff(
 
     results = parallel(
         delayed(memory.cache(_stat_coefficient_diff))(
-            X, X_tildes[i], y, estimator, fdr, preconfigure_estimator
+            X, X_tildes[i], y, clone(estimator), fdr, preconfigure_estimator
         )
         for i in range(n_bootstraps)
     )
diff --git a/src/hidimstat/leave_one_covariate_out.py b/src/hidimstat/leave_one_covariate_out.py
index f1ea71894..6330e04a9 100644
--- a/src/hidimstat/leave_one_covariate_out.py
+++ b/src/hidimstat/leave_one_covariate_out.py
@@ -93,7 +93,7 @@ def _joblib_fit_one_group(self, estimator, X, y, key_groups):
         estimator.fit(X_minus_j, y)
         return estimator
 
-    def _joblib_predict_one_group(self, X, group_id, key_groups):
+    def _joblib_predict_one_group(self, X, group_id, key_groups, seed):
         """Predict the target variable after removing a group of covariates.
         Used in parallel."""
         X_minus_j = np.delete(X, self._groups_ids[group_id], axis=1)
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 29d007656..485eecd65 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -53,14 +53,14 @@ def __init__(
             n_jobs=n_jobs,
             n_permutations=n_permutations,
         )
-        self.random_state = random_state
+        self.random_state = check_random_state(random_state)
 
-    def _permutation(self, X, group_id):
+    def _permutation(self, X, group_id, seed):
         """Create the permuted data for the j-th group of covariates"""
-        self.random_state = check_random_state(self.random_state)
+        rgn = np.random.RandomState(seed)
         X_perm_j = np.array(
             [
-                self.random_state.permutation(X[:, self._groups_ids[group_id]].copy())
+                rgn.permutation(X[:, self._groups_ids[group_id]].copy())
                 for _ in range(self.n_permutations)
             ]
         )

From 93fd34fcdcaf9cd9f0da87f261259f377c9f6d63 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 22 Aug 2025 18:28:59 +0200
Subject: [PATCH 03/46] Test for testing reproducibilities

---
 .../debugger_script/try_reproducibility.py      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tools/examples/debugger_script/try_reproducibility.py

diff --git a/tools/examples/debugger_script/try_reproducibility.py b/tools/examples/debugger_script/try_reproducibility.py
new file mode 100644
index 000000000..d30aeeab2
--- /dev/null
+++ b/tools/examples/debugger_script/try_reproducibility.py
@@ -0,0 +1,17 @@
+import os
+import sys
+
+
+from joblib import Parallel, delayed
+
+# add the example for import them
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../examples")
+
+
+def run_joblib(i):
+    import plot_knockoff_aggregation  # include the example to test
+
+
+# run in parralel the same example for compare result
+parrallel = Parallel(n_jobs=4)
+parrallel(delayed(run_joblib)(i) for i in range(6))

From 4261f070fcc76e3167ad22da7e0bf7e05f9975eb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 22 Aug 2025 18:46:13 +0200
Subject: [PATCH 04/46] fix error

---
 test/test_base_perturbation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_base_perturbation.py b/test/test_base_perturbation.py
index dd3ff6d6c..835c3f12e 100644
--- a/test/test_base_perturbation.py
+++ b/test/test_base_perturbation.py
@@ -11,4 +11,4 @@ def test_no_implemented_methods():
     estimator.fit(X[:, 0], X[:, 1])
     basic_class = BasePerturbation(estimator=estimator)
     with pytest.raises(NotImplementedError):
-        basic_class._permutation(X, group_id=None)
+        basic_class._permutation(X, group_id=None, seed=0)

From 46e9961e6ee90c732c83d4acfbe4f3b0bed4ad6f Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 25 Aug 2025 11:32:47 +0200
Subject: [PATCH 05/46] homogenize managment of seed in examples

---
 examples/plot_2D_simulation_example.py        | 17 ++++++++-------
 .../plot_conditional_vs_marginal_xor_data.py  | 17 +++++++++------
 examples/plot_dcrt_example.py                 | 17 +++++++++++----
 ...ot_diabetes_variable_importance_example.py |  4 ++--
 examples/plot_fmri_data_example.py            | 16 +++++++++++---
 .../plot_importance_classification_iris.py    | 14 ++++++++-----
 examples/plot_knockoff_aggregation.py         | 11 ++++------
 examples/plot_knockoffs_wisconsin.py          | 16 +++++++-------
 examples/plot_model_agnostic_importance.py    | 21 ++++++++++++-------
 .../plot_pitfalls_permutation_importance.py   |  6 +++---
 10 files changed, 87 insertions(+), 52 deletions(-)

diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index 704078933..0986d2fb6 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -71,6 +71,9 @@
 from hidimstat.statistical_tools.p_values import zscore_from_pval
 from hidimstat._utils.scenario import multivariate_simulation
 
+# Define the seeds for the reproducibility of the example
+seeds = np.random.RandomState(0).randint(1e3, size=4)
+
 #############################################################################
 # Specific plotting functions
 # ---------------------------
@@ -172,7 +175,7 @@ def plot(maps, titles):
 
 # generating the data
 X_init, y, beta, epsilon, _, _ = multivariate_simulation(
-    n_samples, shape, roi_size, sigma, smooth_X, seed=1
+    n_samples, shape, roi_size, sigma, smooth_X, seed=seeds[0]
 )
 
 ##############################################################################
@@ -242,7 +245,9 @@ def plot(maps, titles):
 # and referred to as Desparsified Lasso.
 
 # compute desparsified lasso
-beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(X_init, y, n_jobs=n_jobs)
+beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
+    X_init, y, n_jobs=n_jobs, seed=seeds[1]
+)
 pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
     desparsified_lasso_pvalue(X_init.shape[0], beta_hat, sigma_hat, precision_diagonal)
 )
@@ -270,7 +275,7 @@ def plot(maps, titles):
 
 # clustered desparsified lasso (CluDL)
 ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
-    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler()
+    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=seeds[2]
 )
 beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
     clustered_inference_pvalue(n_samples, False, ward_, beta_hat, theta_hat, omega_diag)
@@ -295,11 +300,7 @@ def plot(maps, titles):
 # ensemble of clustered desparsified lasso (EnCluDL)
 list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
     ensemble_clustered_inference(
-        X_init,
-        y,
-        ward,
-        n_clusters,
-        scaler_sampling=StandardScaler(),
+        X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=seeds[3]
     )
 )
 beta_hat, selected_ecdl = ensemble_clustered_inference_pvalue(
diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 6b9b242ad..52f21ed9d 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -18,10 +18,13 @@
 
 from hidimstat import CPI
 
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(0)
+seeds = rng.randint(1e3, size=5)
+
 #############################################################################
 # To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel. The decision function of
 # the fitted model shows that the model is able to separate the two classes.
-rng = np.random.RandomState(0)
 X = rng.randn(400, 2)
 Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype(int)
 
@@ -30,8 +33,10 @@
     np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100),
 )
 
-X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
-model = SVC(kernel="rbf", random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, Y, test_size=0.2, random_state=seeds[0]
+)
+model = SVC(kernel="rbf", random_state=seeds[1])
 model.fit(X_train, y_train)
 Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
 
@@ -79,8 +84,8 @@
 # features. Conditional importance, on the other hand, reveals that both features
 # are important (therefore rejecting the null hypothesis
 # :math:`Y \perp\!\!\!\perp X^1 | X^2`).
-cv = KFold(n_splits=5, shuffle=True, random_state=0)
-clf = SVC(kernel="rbf", random_state=0)
+cv = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
+clf = SVC(kernel="rbf", random_state=seeds[3])
 # Compute marginal importance using univariate models
 marginal_scores = []
 for i in range(X.shape[1]):
@@ -114,7 +119,7 @@
         loss=hinge_loss,
         imputation_model_continuous=RidgeCV(np.logspace(-3, 3, 10)),
         n_permutations=50,
-        random_state=0,
+        random_state=seeds[4],
     )
     vim.fit(X_train, y_train)
     importances.append(vim.importance(X_test, y_test)["importance"])
diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index b3f1efbcf..e8e301f29 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -22,6 +22,9 @@
 from hidimstat import D0CRT
 from hidimstat._utils.scenario import multivariate_1D_simulation
 
+# Define the seeds for the reproducibility of the example
+seeds = np.random.RandomState(0).randint(1e3, size=10)
+
 #############################################################################
 # Processing the computations
 # ---------------------------
@@ -29,7 +32,6 @@
 results_list = []
 for sim_ind in range(10):
     print(f"Processing: {sim_ind+1}")
-    np.random.seed(sim_ind)
 
     # Number of observations
     n = 100
@@ -45,14 +47,18 @@
     alpha = 5e-2
 
     X, y, _, __ = multivariate_1D_simulation(
-        n_samples=n, n_features=p, support_size=n_signal, rho=rho, seed=sim_ind
+        n_samples=n, n_features=p, support_size=n_signal, rho=rho, seed=seeds[sim_ind]
     )
 
     # Applying a reLu function on the outcome y to get non-linear relationships
     y = np.maximum(0.0, y)
 
     ## dcrt Lasso ##
-    d0crt_lasso = D0CRT(estimator=LassoCV(random_state=42, n_jobs=1), screening=False)
+    d0crt_lasso = D0CRT(
+        estimator=LassoCV(random_state=seeds[sim_ind] + 1, n_jobs=1),
+        screening=False,
+        random_state=seeds[sim_ind] + 2,
+    )
     d0crt_lasso.fit_importance(X, y)
     pvals_lasso = d0crt_lasso.pvalues_
     results_list.append(
@@ -65,8 +71,11 @@
 
     ## dcrt Random Forest ##
     d0crt_random_forest = D0CRT(
-        estimator=RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=1),
+        estimator=RandomForestRegressor(
+            n_estimators=100, random_state=seeds[sim_ind] + 3, n_jobs=1
+        ),
         screening=False,
+        random_state=seeds[sim_ind] + 4,
     )
     d0crt_random_forest.fit_importance(X, y)
     pvals_forest = d0crt_random_forest.pvalues_
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index 53f7407fa..817dbb366 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -54,11 +54,11 @@
 from sklearn.linear_model import LogisticRegressionCV, RidgeCV
 from sklearn.metrics import r2_score, root_mean_squared_error
 from sklearn.model_selection import KFold
-from sklearn.utils import check_random_state
 
 from hidimstat import CPI, LOCO, PFI
 
-seeds = check_random_state(42).randint(1, np.iinfo(np.int32).max, 7)
+# Define the seeds for the reproducibility of the example
+seeds = np.random.RandomState(0).randint(1e3, size=7)
 
 #############################################################################
 # Load the diabetes dataset
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index 6ca4d1d14..cbe6af5d2 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -61,6 +61,8 @@
 )
 from hidimstat.statistical_tools.p_values import zscore_from_pval
 
+# Define the seeds for the reproducibility of the example
+seeds = np.random.RandomState(0).randint(1e3, size=3)
 
 # Remmove warnings during loading data
 warnings.filterwarnings(
@@ -152,7 +154,7 @@ def preprocess_haxby(subject=2, memory=None):
 # feature aggregation methods.
 try:
     beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
-        X, y, noise_method="median", max_iteration=1000
+        X, y, noise_method="median", max_iteration=1000, seed=seeds[0], n_jobs=n_job
     )
     pval_dl, _, one_minus_pval_dl, _, cb_min, cb_max = desparsified_lasso_pvalue(
         X.shape[0], beta_hat, sigma_hat, precision_diagonal
@@ -166,7 +168,14 @@ def preprocess_haxby(subject=2, memory=None):
 # Now, the clustered inference algorithm which combines parcellation
 # and high-dimensional inference (c.f. References).
 ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
-    X, y, ward, n_clusters, scaler_sampling=StandardScaler(), tolerance=1e-2
+    X,
+    y,
+    ward,
+    n_clusters,
+    scaler_sampling=StandardScaler(),
+    tolerance=1e-2,
+    seed=seeds[1],
+    n_jobs=n_job,
 )
 beta_hat, pval_cdl, _, one_minus_pval_cdl, _ = clustered_inference_pvalue(
     X.shape[0], None, ward_, beta_hat, theta_hat, omega_diag
@@ -191,7 +200,8 @@ def preprocess_haxby(subject=2, memory=None):
         n_bootstraps=5,
         max_iteration=6000,
         tolerance=1e-2,
-        n_jobs=2,
+        seed=seeds[2],
+        n_jobs=n_job,
     )
 )
 beta_hat, selected = ensemble_clustered_inference_pvalue(
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index a3c166a92..47f4eb7c4 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -36,6 +36,11 @@
 
 from hidimstat import CPI, PFI
 
+
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(0)
+seeds = rng.randint(1e3, size=4)
+
 ########################################################################
 # Load the iris dataset and add a spurious feature
 # ----------------------------------------------------------------------
@@ -44,7 +49,6 @@
 # allows to illustrate that `PFI` is not robust to spurious features,
 # contrarily to `CPI`.
 dataset = load_iris()
-rng = np.random.RandomState(0)
 X, y = dataset.data, dataset.target
 spurious_feat = X[:, 2] + X[:, 3]
 spurious_feat += rng.normal(size=X.shape[0], scale=np.std(spurious_feat) / 2)
@@ -81,7 +85,7 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CPI", groups=No
                 alphas=np.logspace(-3, 3, 10), cv=KFold(shuffle=True, random_state=1)
             ),
             n_permutations=50,
-            random_state=0,
+            random_state=seeds[0],
             method=method,
             loss=loss,
         )
@@ -89,7 +93,7 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CPI", groups=No
         vim = PFI(
             estimator=model_c,
             n_permutations=50,
-            random_state=0,
+            random_state=seeds[1],
             method=method,
             loss=loss,
         )
@@ -118,12 +122,12 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CPI", groups=No
         Cs=np.logspace(-3, 3, 10),
         tol=1e-3,
         max_iter=1000,
-        cv=KFold(shuffle=True, random_state=2),
+        cv=KFold(shuffle=True, random_state=seeds[2]),
     ),
     GridSearchCV(
         SVC(kernel="rbf"),
         {"C": np.logspace(-3, 3, 10)},
-        cv=KFold(shuffle=True, random_state=3),
+        cv=KFold(shuffle=True, random_state=seeds[3]),
     ),
 ]
 cv = KFold(n_splits=5, shuffle=True, random_state=4)
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index 14ea794a7..b77ee844c 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -22,7 +22,6 @@
 import numpy as np
 from sklearn.linear_model import LassoCV
 from sklearn.model_selection import KFold
-from sklearn.utils import check_random_state
 
 from hidimstat.knockoffs import (
     model_x_knockoff,
@@ -59,22 +58,20 @@
 snr = 10
 # number of repetitions for the bootstraps
 n_bootstraps = 25
-# seed for the random generator
-seed = 45
 # number of jobs for repetition of the method
 n_jobs = 2
 # verbosity of the joblib
 joblib_verbose = 0
-
-rng = check_random_state(seed)
-seed_list = rng.randint(1, np.iinfo(np.int32).max, runs)
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(45)
+seed_list = rng.randint(1e3, size=runs)
 
 
 #######################################################################
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
 def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
-    seeds = check_random_state(seed).randint(1, np.iinfo(np.int32).max, 4)
+    seeds = np.random.RandomState(seed).randint(1, np.iinfo(np.int32).max, 4)
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index 8839ff332..a03843436 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -12,8 +12,10 @@
 import numpy as np
 import pandas as pd
 
-seed = 0
-rng = np.random.RandomState(seed)
+
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(43)
+seeds = rng.randint(1e3, size=5)
 
 
 ########################################################################################
@@ -32,7 +34,7 @@
 y = data.target
 
 X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.1, random_state=seed
+    X, y, test_size=0.1, random_state=seeds[0]
 )
 
 scaler = StandardScaler()
@@ -53,7 +55,7 @@
 from sklearn.linear_model import LogisticRegressionCV
 
 clf = LogisticRegressionCV(
-    Cs=np.logspace(-3, 3, 10), penalty="l1", solver="liblinear", random_state=rng
+    Cs=np.logspace(-3, 3, 10), penalty="l1", solver="liblinear", random_state=seeds[1]
 )
 clf.fit(X_train, y_train)
 print(f"Accuracy of Lasso on test set: {clf.score(X_test, y_test):.3f}")
@@ -100,7 +102,7 @@
     Cs=np.logspace(-3, 3, 10),
     penalty="l1",
     solver="liblinear",
-    random_state=rng,
+    random_state=seeds[2],
     n_jobs=1,
 )
 lasso_noisy.fit(noisy_train, y_train)
@@ -150,12 +152,12 @@
         solver="liblinear",
         penalty="l1",
         Cs=np.logspace(-3, 3, 10),
-        random_state=rng,
+        random_state=seeds[3],
         tol=1e-3,
         max_iter=1000,
     ),
     n_bootstraps=1,
-    random_state=0,
+    random_state=seeds[4],
     tol_gauss=1e-15,
     preconfigure_estimator=None,
     fdr=fdr,
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 9ace442d4..039a71d96 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -37,11 +37,14 @@
 
 from hidimstat import LOCO, D0CRT
 
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(0)
+seeds = rng.randint(1e3, size=6)
+
 #############################################################################
 # Generate data where classes are not linearly separable
 # --------------------------------------------------------------
-rng = np.random.RandomState(0)
-X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=rng)
+X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=seeds[0])
 
 
 fig, ax = plt.subplots()
@@ -60,8 +63,8 @@
 ###############################################################################
 # Define a linear and a non-linear estimator
 # ------------------------------------------
-non_linear_model = SVC(kernel="rbf", random_state=0)
-linear_model = LogisticRegressionCV(Cs=np.logspace(-3, 3, 5))
+non_linear_model = SVC(kernel="rbf", random_state=seeds[1])
+linear_model = LogisticRegressionCV(Cs=np.logspace(-3, 3, 5), random_state=seeds[2])
 
 ###############################################################################
 # Compute p-values using d0CRT
@@ -70,11 +73,15 @@
 # test (:math:`H_0: X_j \perp\!\!\!\perp y | X_{-j}`) for each variable. However,
 # this test is based on a linear model (LogisticRegression) and fails to reject the null
 # in the presence of non-linear relationships.
-d0crt_linear = D0CRT(estimator=clone(linear_model), screening=False)
+d0crt_linear = D0CRT(
+    estimator=clone(linear_model), screening=False, random_state=seeds[3]
+)
 d0crt_linear.fit_importance(X, y)
 pval_dcrt_linear = d0crt_linear.pvalues_
 
-d0crt_non_linear = D0CRT(estimator=clone(non_linear_model), screening=False)
+d0crt_non_linear = D0CRT(
+    estimator=clone(non_linear_model), screening=False, random_state=seeds[4]
+)
 d0crt_non_linear.fit_importance(X, y)
 pval_dcrt_non_linear = d0crt_non_linear.pvalues_
 
@@ -86,7 +93,7 @@
 # misspecified model, such as a linear model for this dataset, LOCO fails to reject the null
 # similarly to d0CRT. However, when using a non-linear model (SVC), LOCO is able to
 # identify the important variables.
-cv = KFold(n_splits=5, shuffle=True, random_state=0)
+cv = KFold(n_splits=5, shuffle=True, random_state=seeds[5])
 
 importances_linear = []
 importances_non_linear = []
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 2592345c4..b3ae99e9f 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -24,13 +24,13 @@
 from sklearn.neural_network import MLPRegressor
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.utils import check_random_state
 
 from hidimstat import CPI, PFI
 from hidimstat.conditional_sampling import ConditionalSampler
 
-rng = check_random_state(42)
-seeds = rng.randint(1, np.iinfo(np.int32).max, 9)
+# Define the seeds for the reproducibility of the example
+rng = np.random.RandomState(0)
+seeds = rng.randint(1e3, size=9)
 
 #############################################################################
 # Load the California housing dataset and add a spurious feature

From 8085706443c315b76bda524b0105109357cc5b7a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 25 Aug 2025 16:30:30 +0200
Subject: [PATCH 06/46] fix randomize in plot_konckoff_aggregation

---
 examples/plot_knockoff_aggregation.py | 10 ++++++----
 src/hidimstat/knockoffs.py            |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index b77ee844c..e27bdda1a 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -71,7 +71,7 @@
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
 def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
-    seeds = np.random.RandomState(seed).randint(1, np.iinfo(np.int32).max, 4)
+    seeds = np.random.RandomState(seed).randint(np.iinfo(np.int32).max, 6)
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr
@@ -84,9 +84,10 @@ def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, see
         estimator=LassoCV(
             n_jobs=1,
             cv=KFold(n_splits=5, shuffle=True, random_state=seeds[0]),
+            random_state=seeds[1],
         ),
         n_bootstraps=1,
-        random_state=seeds[1],
+        random_state=seeds[2],
     )
     mx_selection, _ = model_x_knockoff_pvalue(test_scores, fdr=fdr)
     fdp_mx, power_mx = fdp_power(mx_selection, non_zero_index)
@@ -97,11 +98,12 @@ def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, see
         y,
         estimator=LassoCV(
             n_jobs=1,
-            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[2]),
+            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[3]),
+            random_state=seeds[4],
         ),
         n_bootstraps=n_bootstraps,
         n_jobs=1,
-        random_state=seeds[3],
+        random_state=seeds[5],
     )
 
     # Use p-values aggregation [2]
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index b01605051..ef1629be9 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -190,7 +190,7 @@ def model_x_knockoff(
 
     # get the seed for the different run
     seed_list = check_random_state(random_state).randint(
-        0, np.iinfo(np.int32).max, n_bootstraps
+        np.iinfo(np.int32).max, size=n_bootstraps
     )
 
     if centered:

From e68a9a3e818ba94de93f503da6c789976668c793 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 25 Aug 2025 16:48:23 +0200
Subject: [PATCH 07/46] fix error in plot

---
 examples/plot_knockoff_aggregation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index e27bdda1a..43d62d79d 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -71,7 +71,7 @@
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
 def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
-    seeds = np.random.RandomState(seed).randint(np.iinfo(np.int32).max, 6)
+    seeds = np.random.RandomState(seed).randint(np.iinfo(np.int32).max, size=6)
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr

From 0b90779f813721c0bed60ad41b0493138d1dff6b Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 25 Aug 2025 18:32:48 +0200
Subject: [PATCH 08/46] fix path

---
 tools/examples/debugger_script/try_reproducibility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/examples/debugger_script/try_reproducibility.py b/tools/examples/debugger_script/try_reproducibility.py
index d30aeeab2..95d3c804a 100644
--- a/tools/examples/debugger_script/try_reproducibility.py
+++ b/tools/examples/debugger_script/try_reproducibility.py
@@ -5,7 +5,7 @@
 from joblib import Parallel, delayed
 
 # add the example for import them
-sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../examples")
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../../examples")
 
 
 def run_joblib(i):

From 8f4011f7a541705ff631d9a527ba8cafa33e4fdf Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 25 Aug 2025 18:34:22 +0200
Subject: [PATCH 09/46] fix seed for seaborn

---
 examples/plot_knockoff_aggregation.py           |  2 +-
 .../plot_pitfalls_permutation_importance.py     | 17 +++++++----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index 43d62d79d..699fd2cee 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -71,7 +71,7 @@
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
 def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
-    seeds = np.random.RandomState(seed).randint(np.iinfo(np.int32).max, size=6)
+    seeds = np.random.RandomState(seed).randint(1, np.iinfo(np.int32).max, 6)
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 305fdd10f..908b587d4 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -30,7 +30,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = rng.randint(1e3, size=9)
+seeds = rng.randint(1e3, size=10)
 
 #############################################################################
 # Load the California housing dataset and add a spurious feature
@@ -150,10 +150,7 @@
 # Create a horizontal boxplot of permutation importances
 fig, ax = plt.subplots()
 sns.barplot(
-    permutation_importances,
-    orient="h",
-    color="tab:blue",
-    capsize=0.2,
+    permutation_importances, orient="h", color="tab:blue", capsize=0.2, seed=seeds[4]
 )
 ax.set_xlabel("Permutation Importance")
 # Add asterisks for features with p-values below the threshold
@@ -203,9 +200,9 @@
         model_c,
         imputation_model_continuous=RidgeCV(
             alphas=np.logspace(-3, 3, 5),
-            cv=KFold(shuffle=True, random_state=seeds[4]),
+            cv=KFold(shuffle=True, random_state=seeds[5]),
         ),
-        random_state=seeds[5],
+        random_state=seeds[6],
         n_jobs=5,
     )
     cfi.fit(X_test, y_test)
@@ -261,14 +258,14 @@
 X_train, X_test = train_test_split(
     X,
     test_size=0.3,
-    random_state=seeds[6],
+    random_state=seeds[7],
 )
 
 conditional_sampler = ConditionalSampler(
     model_regression=RidgeCV(
-        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=seeds[7])
+        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=seeds[8])
     ),
-    random_state=seeds[8],
+    random_state=seeds[9],
 )
 
 

From fc0d21fbaf847cf471841cb6eea53ff984d46415 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 26 Aug 2025 12:17:41 +0200
Subject: [PATCH 10/46] improve seed setting

---
 examples/plot_2D_simulation_example.py                | 2 +-
 examples/plot_conditional_vs_marginal_xor_data.py     | 2 +-
 examples/plot_dcrt_example.py                         | 2 +-
 examples/plot_diabetes_variable_importance_example.py | 2 +-
 examples/plot_fmri_data_example.py                    | 2 +-
 examples/plot_importance_classification_iris.py       | 2 +-
 examples/plot_knockoff_aggregation.py                 | 4 ++--
 examples/plot_knockoffs_wisconsin.py                  | 2 +-
 examples/plot_model_agnostic_importance.py            | 2 +-
 examples/plot_pitfalls_permutation_importance.py      | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index 0986d2fb6..ae600712e 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -72,7 +72,7 @@
 from hidimstat._utils.scenario import multivariate_simulation
 
 # Define the seeds for the reproducibility of the example
-seeds = np.random.RandomState(0).randint(1e3, size=4)
+seeds = np.arange(4) + 20
 
 #############################################################################
 # Specific plotting functions
diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 250e99a1a..2bac762cb 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -20,7 +20,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = rng.randint(1e3, size=5)
+seeds = range(1, 6)
 
 #############################################################################
 # To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel. The decision function of
diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index e8e301f29..91113ec12 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -23,7 +23,7 @@
 from hidimstat._utils.scenario import multivariate_1D_simulation
 
 # Define the seeds for the reproducibility of the example
-seeds = np.random.RandomState(0).randint(1e3, size=10)
+seeds = np.arange(10) + 10
 
 #############################################################################
 # Processing the computations
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index de2a60386..35ba9a810 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -58,7 +58,7 @@
 from hidimstat import CFI, LOCO, PFI
 
 # Define the seeds for the reproducibility of the example
-seeds = np.random.RandomState(0).randint(1e3, size=7)
+seeds = np.arange(7) + 20
 
 #############################################################################
 # Load the diabetes dataset
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index cbe6af5d2..6571afed8 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -62,7 +62,7 @@
 from hidimstat.statistical_tools.p_values import zscore_from_pval
 
 # Define the seeds for the reproducibility of the example
-seeds = np.random.RandomState(0).randint(1e3, size=3)
+seeds = range(3)
 
 # Remmove warnings during loading data
 warnings.filterwarnings(
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index 8eb4a425c..a889c75a8 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -39,7 +39,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = rng.randint(1e3, size=4)
+seeds = np.arange(4) + 10
 
 ########################################################################
 # Load the iris dataset and add a spurious feature
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index 699fd2cee..f7c816996 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -64,14 +64,14 @@
 joblib_verbose = 0
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(45)
-seed_list = rng.randint(1e3, size=runs)
+seed_list = range(runs)
 
 
 #######################################################################
 # Define the function for running the three procedures on the same data
 # ---------------------------------------------------------------------
 def single_run(n_samples, n_features, rho, sparsity, snr, fdr, n_bootstraps, seed=0):
-    seeds = np.random.RandomState(seed).randint(1, np.iinfo(np.int32).max, 6)
+    seeds = np.arange(6) * seed
     # Generate data
     X, y, _, non_zero_index = multivariate_1D_simulation_AR(
         n_samples, n_features, rho=rho, sparsity=sparsity, seed=seed, snr=snr
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index a03843436..3547dd746 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -15,7 +15,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(43)
-seeds = rng.randint(1e3, size=5)
+seeds = np.arange(5) + 16
 
 
 ########################################################################################
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 039a71d96..44fc334d0 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -39,7 +39,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = rng.randint(1e3, size=6)
+seeds = range(1, 7)
 
 #############################################################################
 # Generate data where classes are not linearly separable
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 908b587d4..dc3b27e68 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -30,7 +30,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = rng.randint(1e3, size=10)
+seeds = range(1, 11)
 
 #############################################################################
 # Load the California housing dataset and add a spurious feature

From 99ed1e6885f13c34c6a4bbbeda1d6a7a58b65fce Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 26 Aug 2025 14:44:20 +0200
Subject: [PATCH 11/46] change seed in methods

---
 examples/plot_2D_simulation_example.py                |  2 +-
 examples/plot_diabetes_variable_importance_example.py | 10 +++++-----
 examples/plot_knockoff_aggregation.py                 |  4 ++--
 src/hidimstat/base_perturbation.py                    |  4 ++--
 src/hidimstat/conditional_feature_importance.py       |  3 ++-
 src/hidimstat/knockoffs.py                            |  4 ++--
 6 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index ae600712e..df8fe5831 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -72,7 +72,7 @@
 from hidimstat._utils.scenario import multivariate_simulation
 
 # Define the seeds for the reproducibility of the example
-seeds = np.arange(4) + 20
+seeds = np.arange(4) + 70
 
 #############################################################################
 # Specific plotting functions
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index 35ba9a810..90457af27 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -58,7 +58,7 @@
 from hidimstat import CFI, LOCO, PFI
 
 # Define the seeds for the reproducibility of the example
-seeds = np.arange(7) + 20
+seeds = np.arange(6) + 20
 
 #############################################################################
 # Load the diabetes dataset
@@ -106,14 +106,14 @@
         estimator=regressor_list[i],
         imputation_model_continuous=RidgeCV(
             alphas=np.logspace(-3, 3, 10),
-            cv=KFold(shuffle=True, random_state=seeds[3]),
+            cv=KFold(shuffle=True, random_state=seeds[2]),
         ),
         imputation_model_categorical=LogisticRegressionCV(
             Cs=np.logspace(-2, 2, 10),
-            cv=KFold(shuffle=True, random_state=seeds[4]),
+            cv=KFold(shuffle=True, random_state=seeds[3]),
         ),
         n_permutations=50,
-        random_state=seeds[5],
+        random_state=seeds[4],
         n_jobs=4,
     )
     cfi.fit(X_train, y_train)
@@ -152,7 +152,7 @@
     pfi = PFI(
         estimator=regressor_list[i],
         n_permutations=50,
-        random_state=seeds[6],
+        random_state=seeds[5],
         n_jobs=4,
     )
     pfi.fit(X_train, y_train)
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index f7c816996..fed98e009 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -63,8 +63,8 @@
 # verbosity of the joblib
 joblib_verbose = 0
 # Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(45)
-seed_list = range(runs)
+rng = np.random.RandomState(42)
+seed_list = np.arange(runs) + 10
 
 
 #######################################################################
diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 88c31952e..b9a759fac 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -109,8 +109,8 @@ def predict(self, X):
         if self.random_state is None:
             list_seed = [None for i in range(self.n_groups)]
         else:
-            list_seed = self.random_state.randint(
-                1, np.iinfo(np.int32).max, self.n_groups
+            list_seed = self.random_state.randint(np.iinfo(np.int32).max) + np.arange(
+                self.n_groups
             )
         out_list = Parallel(n_jobs=self.n_jobs)(
             delayed(self._joblib_predict_one_group)(X_, group_id, group_key, seed)
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index de2ca5cd9..52cd3b2a1 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -120,7 +120,8 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
             )
             for group_id, seed in zip(
                 range(self.n_groups),
-                self.random_state.randint(0, np.iinfo(np.int32).max, self.n_groups),
+                self.random_state.randint(np.iinfo(np.int32).max)
+                + np.arange(self.n_groups),
             )
         ]
 
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index ef1629be9..37918117e 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -190,8 +190,8 @@ def model_x_knockoff(
 
     # get the seed for the different run
     seed_list = check_random_state(random_state).randint(
-        np.iinfo(np.int32).max, size=n_bootstraps
-    )
+        np.iinfo(np.int32).max
+    ) + np.arange(n_bootstraps)
 
     if centered:
         X = StandardScaler().fit_transform(X)

From 9ac20595496572c9ec7e6647cb86954f34723236 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 26 Aug 2025 15:20:49 +0200
Subject: [PATCH 12/46] Fix seed in test and example

---
 examples/plot_importance_classification_iris.py | 2 +-
 test/test_conditional_feature_importance.py     | 6 +++---
 test/test_knockoff.py                           | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index 4a9444f26..431868383 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -39,7 +39,7 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = np.arange(4) + 10
+seeds = np.arange(5)
 
 ########################################################################
 # Load the iris dataset and add a spurious feature
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index f3bec9735..097b5dd8f 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -86,7 +86,7 @@ def run_cfi(X, y, n_permutation, seed):
     zip(*(list(zip(*parameter_exact))[1:])),
     ids=list(zip(*parameter_exact))[0],
 )
-@pytest.mark.parametrize("n_permutation, cfi_seed", [(10, 0)], ids=["default_cfi"])
+@pytest.mark.parametrize("n_permutation, cfi_seed", [(10, 5)], ids=["default_cfi"])
 def test_linear_data_exact(data_generator, n_permutation, cfi_seed):
     """Tests the method on linear cases with noise and correlation"""
     X, y, important_features, _ = data_generator
@@ -118,7 +118,7 @@ def test_linear_data_exact(data_generator, n_permutation, cfi_seed):
     zip(*(list(zip(*parameter_partial))[1:])),
     ids=list(zip(*parameter_partial))[0],
 )
-@pytest.mark.parametrize("n_permutation, cfi_seed", [(10, 0)], ids=["default_cfi"])
+@pytest.mark.parametrize("n_permutation, cfi_seed", [(10, 5)], ids=["default_cfi"])
 def test_linear_data_partial(data_generator, n_permutation, cfi_seed):
     """Tests the method on linear cases with noise and correlation"""
     X, y, important_features, _ = data_generator
@@ -141,7 +141,7 @@ def test_linear_data_partial(data_generator, n_permutation, cfi_seed):
     [(150, 200, 10, 0.2, 42, 1.0, 1.0, 0.0)],
     ids=["high level noise"],
 )
-@pytest.mark.parametrize("n_permutation, cfi_seed", [(20, 0)], ids=["default_cfi"])
+@pytest.mark.parametrize("n_permutation, cfi_seed", [(20, 5)], ids=["default_cfi"])
 def test_linear_data_fail(data_generator, n_permutation, cfi_seed):
     """Tests when the method doesn't identify all important features"""
     X, y, important_features, not_important_features = data_generator
diff --git a/test/test_knockoff.py b/test/test_knockoff.py
index 3b0e86aa1..32f1b89e3 100644
--- a/test/test_knockoff.py
+++ b/test/test_knockoff.py
@@ -206,7 +206,7 @@ def test_estimate_distribution():
     """
     test different estimation of the covariance
     """
-    seed = 42
+    seed = 0
     fdr = 0.1
     n = 100
     p = 50
@@ -227,10 +227,10 @@ def test_estimate_distribution():
         y,
         cov_estimator=GraphicalLassoCV(
             alphas=[1e-3, 1e-2, 1e-1, 1],
-            cv=KFold(n_splits=5, shuffle=True, random_state=0),
+            cv=KFold(n_splits=5, shuffle=True, random_state=seed + 2),
         ),
         n_bootstraps=1,
-        random_state=seed + 2,
+        random_state=seed + 3,
         fdr=fdr,
     )
     for i in selected:

From 5d5a6d8c7b6b964d78288a27490ab7a19a4ffd44 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 27 Aug 2025 10:34:33 +0200
Subject: [PATCH 13/46] Fix seeds

---
 examples/plot_2D_simulation_example.py        | 11 +++----
 .../plot_conditional_vs_marginal_xor_data.py  | 13 ++++-----
 examples/plot_dcrt_example.py                 | 13 ++++-----
 ...ot_diabetes_variable_importance_example.py | 21 ++++++--------
 examples/plot_fmri_data_example.py            | 12 ++++----
 .../plot_importance_classification_iris.py    | 11 ++++---
 examples/plot_knockoff_aggregation.py         |  3 +-
 examples/plot_knockoffs_wisconsin.py          | 11 ++++---
 examples/plot_model_agnostic_importance.py    | 15 ++++------
 .../plot_pitfalls_permutation_importance.py   | 29 ++++++++-----------
 10 files changed, 57 insertions(+), 82 deletions(-)

diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index 77ffbdda3..022cc4620 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -71,9 +71,6 @@
 from hidimstat.statistical_tools.p_values import zscore_from_pval
 from hidimstat._utils.scenario import multivariate_simulation_spatial
 
-# Define the seeds for the reproducibility of the example
-seeds = np.arange(4) + 70
-
 #############################################################################
 # Specific plotting functions
 # ---------------------------
@@ -175,7 +172,7 @@ def plot(maps, titles):
 
 # generating the data
 X_init, y, beta, epsilon = multivariate_simulation_spatial(
-    n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=seeds[0]
+    n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=70
 )
 
 ##############################################################################
@@ -246,7 +243,7 @@ def plot(maps, titles):
 
 # compute desparsified lasso
 beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
-    X_init, y, n_jobs=n_jobs, seed=seeds[1]
+    X_init, y, n_jobs=n_jobs, seed=71
 )
 pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
     desparsified_lasso_pvalue(X_init.shape[0], beta_hat, sigma_hat, precision_diagonal)
@@ -275,7 +272,7 @@ def plot(maps, titles):
 
 # clustered desparsified lasso (CluDL)
 ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
-    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=seeds[2]
+    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=72
 )
 beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
     clustered_inference_pvalue(n_samples, False, ward_, beta_hat, theta_hat, omega_diag)
@@ -300,7 +297,7 @@ def plot(maps, titles):
 # ensemble of clustered desparsified lasso (EnCluDL)
 list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
     ensemble_clustered_inference(
-        X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=seeds[3]
+        X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=73
     )
 )
 beta_hat, selected_ecdl = ensemble_clustered_inference_pvalue(
diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 2bac762cb..2b192574e 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -20,7 +20,6 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = range(1, 6)
 
 #############################################################################
 # To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel. The decision function of
@@ -33,10 +32,8 @@
     np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100),
 )
 
-X_train, X_test, y_train, y_test = train_test_split(
-    X, Y, test_size=0.2, random_state=seeds[0]
-)
-model = SVC(kernel="rbf", random_state=seeds[1])
+X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
+model = SVC(kernel="rbf", random_state=2)
 model.fit(X_train, y_train)
 Z = model.decision_function(np.c_[xx.ravel(), yy.ravel()])
 
@@ -84,8 +81,8 @@
 # features. Conditional importance, on the other hand, reveals that both features
 # are important (therefore rejecting the null hypothesis
 # :math:`Y \perp\!\!\!\perp X^1 | X^2`).
-cv = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
-clf = SVC(kernel="rbf", random_state=seeds[3])
+cv = KFold(n_splits=5, shuffle=True, random_state=3)
+clf = SVC(kernel="rbf", random_state=4)
 # Compute marginal importance using univariate models
 marginal_scores = []
 for i in range(X.shape[1]):
@@ -119,7 +116,7 @@
         loss=hinge_loss,
         imputation_model_continuous=RidgeCV(np.logspace(-3, 3, 10)),
         n_permutations=50,
-        random_state=seeds[4],
+        random_state=5,
     )
     vim.fit(X_train, y_train)
     importances.append(vim.importance(X_test, y_test)["importance"])
diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index fcd441db4..3e7aad3b3 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -22,9 +22,6 @@
 from hidimstat import D0CRT
 from hidimstat._utils.scenario import multivariate_simulation
 
-# Define the seeds for the reproducibility of the example
-seeds = np.arange(10) + 10
-
 #############################################################################
 # Processing the computations
 # ---------------------------
@@ -53,7 +50,7 @@
         rho=rho,
         signal_noise_ratio=signal_noise_ratio,
         shuffle=True,
-        seed=seeds[sim_ind],
+        seed=sim_ind + 10,
     )
 
     # Applying a reLu function on the outcome y to get non-linear relationships
@@ -61,9 +58,9 @@
 
     ## dcrt Lasso ##
     d0crt_lasso = D0CRT(
-        estimator=LassoCV(random_state=seeds[sim_ind] + 1, n_jobs=1),
+        estimator=LassoCV(random_state=sim_ind + 11, n_jobs=1),
         screening=False,
-        random_state=seeds[sim_ind] + 2,
+        random_state=sim_ind + 12,
     )
     d0crt_lasso.fit_importance(X, y)
     pvals_lasso = d0crt_lasso.pvalues_
@@ -79,10 +76,10 @@
     ## dcrt Random Forest ##
     d0crt_random_forest = D0CRT(
         estimator=RandomForestRegressor(
-            n_estimators=100, random_state=seeds[sim_ind] + 3, n_jobs=1
+            n_estimators=100, random_state=sim_ind + 13, n_jobs=1
         ),
         screening=False,
-        random_state=seeds[sim_ind] + 4,
+        random_state=sim_ind + 14,
     )
     d0crt_random_forest.fit_importance(X, y)
     pvals_forest = d0crt_random_forest.pvalues_
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index 90457af27..fd2a10c0e 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -57,9 +57,6 @@
 
 from hidimstat import CFI, LOCO, PFI
 
-# Define the seeds for the reproducibility of the example
-seeds = np.arange(6) + 20
-
 #############################################################################
 # Load the diabetes dataset
 # -------------------------
@@ -76,10 +73,10 @@
 n_folds = 5
 regressor = RidgeCV(
     alphas=np.logspace(-3, 3, 10),
-    cv=KFold(shuffle=True, random_state=seeds[0]),
+    cv=KFold(shuffle=True, random_state=20),
 )
 regressor_list = [clone(regressor) for _ in range(n_folds)]
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     regressor_list[i].fit(X[train_index], y[train_index])
     score = r2_score(
@@ -97,7 +94,7 @@
 # --------------------------------------------------------
 
 cfi_importance_list = []
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
@@ -106,14 +103,14 @@
         estimator=regressor_list[i],
         imputation_model_continuous=RidgeCV(
             alphas=np.logspace(-3, 3, 10),
-            cv=KFold(shuffle=True, random_state=seeds[2]),
+            cv=KFold(shuffle=True, random_state=22),
         ),
         imputation_model_categorical=LogisticRegressionCV(
             Cs=np.logspace(-2, 2, 10),
-            cv=KFold(shuffle=True, random_state=seeds[3]),
+            cv=KFold(shuffle=True, random_state=23),
         ),
         n_permutations=50,
-        random_state=seeds[4],
+        random_state=24,
         n_jobs=4,
     )
     cfi.fit(X_train, y_train)
@@ -125,7 +122,7 @@
 # ---------------------------------------------------------
 
 loco_importance_list = []
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
@@ -144,7 +141,7 @@
 # ----------------------------------------------------------------
 
 pfi_importance_list = []
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=seeds[1])
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     print(f"Fold {i}")
     X_train, X_test = X[train_index], X[test_index]
@@ -152,7 +149,7 @@
     pfi = PFI(
         estimator=regressor_list[i],
         n_permutations=50,
-        random_state=seeds[5],
+        random_state=25,
         n_jobs=4,
     )
     pfi.fit(X_train, y_train)
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index 6571afed8..8f2a3bb19 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -61,9 +61,6 @@
 )
 from hidimstat.statistical_tools.p_values import zscore_from_pval
 
-# Define the seeds for the reproducibility of the example
-seeds = range(3)
-
 # Remmove warnings during loading data
 warnings.filterwarnings(
     "ignore", message="The provided image has no sform in its header."
@@ -71,6 +68,7 @@
 
 # Limit the ressoruce use for the example to 5 G.
 resource.setrlimit(resource.RLIMIT_AS, (int(5 * 1e9), int(5 * 1e9)))
+# Set the number of job for the methods
 n_job = 1
 
 
@@ -154,7 +152,7 @@ def preprocess_haxby(subject=2, memory=None):
 # feature aggregation methods.
 try:
     beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
-        X, y, noise_method="median", max_iteration=1000, seed=seeds[0], n_jobs=n_job
+        X, y, noise_method="median", max_iteration=1000, seed=0, n_jobs=n_job
     )
     pval_dl, _, one_minus_pval_dl, _, cb_min, cb_max = desparsified_lasso_pvalue(
         X.shape[0], beta_hat, sigma_hat, precision_diagonal
@@ -174,7 +172,7 @@ def preprocess_haxby(subject=2, memory=None):
     n_clusters,
     scaler_sampling=StandardScaler(),
     tolerance=1e-2,
-    seed=seeds[1],
+    seed=1,
     n_jobs=n_job,
 )
 beta_hat, pval_cdl, _, one_minus_pval_cdl, _ = clustered_inference_pvalue(
@@ -188,7 +186,7 @@ def preprocess_haxby(subject=2, memory=None):
 # which means that 5 different parcellations are considered and
 # then 5 statistical maps are produced and aggregated into one.
 # However you might benefit from clustering randomization taking
-# `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs=2`.
+# `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs`.
 list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
     ensemble_clustered_inference(
         X,
@@ -200,7 +198,7 @@ def preprocess_haxby(subject=2, memory=None):
         n_bootstraps=5,
         max_iteration=6000,
         tolerance=1e-2,
-        seed=seeds[2],
+        seed=2,
         n_jobs=n_job,
     )
 )
diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index 431868383..936ac436b 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -39,7 +39,6 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = np.arange(5)
 
 ########################################################################
 # Load the iris dataset and add a spurious feature
@@ -85,7 +84,7 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CFI", groups=No
                 alphas=np.logspace(-3, 3, 10), cv=KFold(shuffle=True, random_state=1)
             ),
             n_permutations=50,
-            random_state=seeds[0],
+            random_state=2,
             method=method,
             loss=loss,
         )
@@ -93,7 +92,7 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CFI", groups=No
         vim = PFI(
             estimator=model_c,
             n_permutations=50,
-            random_state=seeds[1],
+            random_state=3,
             method=method,
             loss=loss,
         )
@@ -122,15 +121,15 @@ def run_one_fold(X, y, model, train_index, test_index, vim_name="CFI", groups=No
         Cs=np.logspace(-3, 3, 10),
         tol=1e-3,
         max_iter=1000,
-        cv=KFold(shuffle=True, random_state=seeds[2]),
+        cv=KFold(shuffle=True, random_state=4),
     ),
     GridSearchCV(
         SVC(kernel="rbf"),
         {"C": np.logspace(-3, 3, 10)},
-        cv=KFold(shuffle=True, random_state=seeds[3]),
+        cv=KFold(shuffle=True, random_state=5),
     ),
 ]
-cv = KFold(n_splits=5, shuffle=True, random_state=seeds[4])
+cv = KFold(n_splits=5, shuffle=True, random_state=6)
 groups = {ft: [i] for i, ft in enumerate(dataset.feature_names)}
 out_list = Parallel(n_jobs=5)(
     delayed(run_one_fold)(
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index 7da600ab1..eb08a61f4 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -64,7 +64,6 @@
 joblib_verbose = 0
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(42)
-seed_list = np.arange(runs) + 10
 
 
 #######################################################################
@@ -189,7 +188,7 @@ def effect_number_samples(n_samples):
             n_bootstraps,
             seed=seed,
         )
-        for seed in seed_list
+        for seed in range(10, 10 + runs)
     )
 
     fdps_mx = []
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index 3547dd746..c5cc6d07b 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -15,7 +15,6 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(43)
-seeds = np.arange(5) + 16
 
 
 ########################################################################################
@@ -34,7 +33,7 @@
 y = data.target
 
 X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.1, random_state=seeds[0]
+    X, y, test_size=0.1, random_state=16
 )
 
 scaler = StandardScaler()
@@ -55,7 +54,7 @@
 from sklearn.linear_model import LogisticRegressionCV
 
 clf = LogisticRegressionCV(
-    Cs=np.logspace(-3, 3, 10), penalty="l1", solver="liblinear", random_state=seeds[1]
+    Cs=np.logspace(-3, 3, 10), penalty="l1", solver="liblinear", random_state=17
 )
 clf.fit(X_train, y_train)
 print(f"Accuracy of Lasso on test set: {clf.score(X_test, y_test):.3f}")
@@ -102,7 +101,7 @@
     Cs=np.logspace(-3, 3, 10),
     penalty="l1",
     solver="liblinear",
-    random_state=seeds[2],
+    random_state=18,
     n_jobs=1,
 )
 lasso_noisy.fit(noisy_train, y_train)
@@ -152,12 +151,12 @@
         solver="liblinear",
         penalty="l1",
         Cs=np.logspace(-3, 3, 10),
-        random_state=seeds[3],
+        random_state=19,
         tol=1e-3,
         max_iter=1000,
     ),
     n_bootstraps=1,
-    random_state=seeds[4],
+    random_state=20,
     tol_gauss=1e-15,
     preconfigure_estimator=None,
     fdr=fdr,
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 44fc334d0..efa759cbd 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -39,12 +39,11 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = range(1, 7)
 
 #############################################################################
 # Generate data where classes are not linearly separable
 # --------------------------------------------------------------
-X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=seeds[0])
+X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=1)
 
 
 fig, ax = plt.subplots()
@@ -63,8 +62,8 @@
 ###############################################################################
 # Define a linear and a non-linear estimator
 # ------------------------------------------
-non_linear_model = SVC(kernel="rbf", random_state=seeds[1])
-linear_model = LogisticRegressionCV(Cs=np.logspace(-3, 3, 5), random_state=seeds[2])
+non_linear_model = SVC(kernel="rbf", random_state=2)
+linear_model = LogisticRegressionCV(Cs=np.logspace(-3, 3, 5), random_state=3)
 
 ###############################################################################
 # Compute p-values using d0CRT
@@ -73,14 +72,12 @@
 # test (:math:`H_0: X_j \perp\!\!\!\perp y | X_{-j}`) for each variable. However,
 # this test is based on a linear model (LogisticRegression) and fails to reject the null
 # in the presence of non-linear relationships.
-d0crt_linear = D0CRT(
-    estimator=clone(linear_model), screening=False, random_state=seeds[3]
-)
+d0crt_linear = D0CRT(estimator=clone(linear_model), screening=False, random_state=4)
 d0crt_linear.fit_importance(X, y)
 pval_dcrt_linear = d0crt_linear.pvalues_
 
 d0crt_non_linear = D0CRT(
-    estimator=clone(non_linear_model), screening=False, random_state=seeds[4]
+    estimator=clone(non_linear_model), screening=False, random_state=5
 )
 d0crt_non_linear.fit_importance(X, y)
 pval_dcrt_non_linear = d0crt_non_linear.pvalues_
@@ -93,7 +90,7 @@
 # misspecified model, such as a linear model for this dataset, LOCO fails to reject the null
 # similarly to d0CRT. However, when using a non-linear model (SVC), LOCO is able to
 # identify the important variables.
-cv = KFold(n_splits=5, shuffle=True, random_state=seeds[5])
+cv = KFold(n_splits=5, shuffle=True, random_state=6)
 
 importances_linear = []
 importances_non_linear = []
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index dc3b27e68..4563cc700 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -30,7 +30,6 @@
 
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(0)
-seeds = range(1, 11)
 
 #############################################################################
 # Load the California housing dataset and add a spurious feature
@@ -42,9 +41,7 @@
 dataset = fetch_california_housing()
 X_, y_ = dataset.data, dataset.target
 # only use 2/3 of samples to speed up the example
-X, _, y, _ = train_test_split(
-    X_, y_, test_size=0.6667, random_state=seeds[0], shuffle=True
-)
+X, _, y, _ = train_test_split(X_, y_, test_size=0.6667, random_state=1, shuffle=True)
 
 redundant_coef = rng.choice(np.arange(X.shape[1]), size=(3,), replace=False)
 X_spurious = X[:, redundant_coef].sum(axis=1)
@@ -89,7 +86,7 @@
     regressor=make_pipeline(
         StandardScaler(),
         MLPRegressor(
-            random_state=seeds[1],
+            random_state=2,
             hidden_layer_sizes=(32, 16, 8),
             early_stopping=True,
             learning_rate_init=0.01,
@@ -100,7 +97,7 @@
 )
 
 
-kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
+kf = KFold(n_splits=5, shuffle=True, random_state=3)
 for train_index, test_index in kf.split(X):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -122,7 +119,7 @@
 # testing conditional importance, as it identifies the spurious feature as important.
 permutation_importances = []
 conditional_permutation_importances = []
-kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
+kf = KFold(n_splits=5, shuffle=True, random_state=3)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -134,7 +131,7 @@
         model_c,
         n_permutations=50,
         n_jobs=5,
-        random_state=seeds[3],
+        random_state=4,
     )
     pfi.fit(X_test, y_test)
 
@@ -149,9 +146,7 @@
 pval_threshold = 0.05
 # Create a horizontal boxplot of permutation importances
 fig, ax = plt.subplots()
-sns.barplot(
-    permutation_importances, orient="h", color="tab:blue", capsize=0.2, seed=seeds[4]
-)
+sns.barplot(permutation_importances, orient="h", color="tab:blue", capsize=0.2, seed=5)
 ax.set_xlabel("Permutation Importance")
 # Add asterisks for features with p-values below the threshold
 for i, pval in enumerate(pval_pfi):
@@ -188,7 +183,7 @@
 # explained by the other features unchanged. This method is valid for testing conditional
 # importance. As shown below, it does not identify the spurious feature as important.
 conditional_importances = []
-kf = KFold(n_splits=5, shuffle=True, random_state=seeds[2])
+kf = KFold(n_splits=5, shuffle=True, random_state=3)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -200,9 +195,9 @@
         model_c,
         imputation_model_continuous=RidgeCV(
             alphas=np.logspace(-3, 3, 5),
-            cv=KFold(shuffle=True, random_state=seeds[5]),
+            cv=KFold(shuffle=True, random_state=6),
         ),
-        random_state=seeds[6],
+        random_state=7,
         n_jobs=5,
     )
     cfi.fit(X_test, y_test)
@@ -258,14 +253,14 @@
 X_train, X_test = train_test_split(
     X,
     test_size=0.3,
-    random_state=seeds[7],
+    random_state=8,
 )
 
 conditional_sampler = ConditionalSampler(
     model_regression=RidgeCV(
-        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=seeds[8])
+        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=9)
     ),
-    random_state=seeds[9],
+    random_state=10,
 )
 
 

From 25f0b3e4c7ab80d182ca295c6c9ce340275bb456 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 28 Aug 2025 10:01:01 +0200
Subject: [PATCH 14/46] Apply suggestions from code review

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 examples/plot_dcrt_example.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index 3e7aad3b3..7efcae30a 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -58,7 +58,8 @@
 
     ## dcrt Lasso ##
     d0crt_lasso = D0CRT(
-        estimator=LassoCV(random_state=sim_ind + 11, n_jobs=1),
+        estimator=LassoCV(random_state=sim_ind, n_jobs=1),
+``` Please avoid these weidies
         screening=False,
         random_state=sim_ind + 12,
     )
@@ -76,10 +77,10 @@
     ## dcrt Random Forest ##
     d0crt_random_forest = D0CRT(
         estimator=RandomForestRegressor(
-            n_estimators=100, random_state=sim_ind + 13, n_jobs=1
+            n_estimators=100, random_state=sim_ind, n_jobs=1
         ),
         screening=False,
-        random_state=sim_ind + 14,
+        random_state=sim_ind,
     )
     d0crt_random_forest.fit_importance(X, y)
     pvals_forest = d0crt_random_forest.pvalues_

From 7782e6fa53f4fa584ebda404dcbead9921ba4daf Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 28 Aug 2025 10:02:27 +0200
Subject: [PATCH 15/46] remove some seed

---
 examples/plot_dcrt_example.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index 7efcae30a..c09a5785f 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -59,9 +59,8 @@
     ## dcrt Lasso ##
     d0crt_lasso = D0CRT(
         estimator=LassoCV(random_state=sim_ind, n_jobs=1),
-``` Please avoid these weidies
         screening=False,
-        random_state=sim_ind + 12,
+        random_state=sim_ind,
     )
     d0crt_lasso.fit_importance(X, y)
     pvals_lasso = d0crt_lasso.pvalues_

From f21edd7aba4873fe1ed6f4d76723f3e3e62d44b9 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 28 Aug 2025 10:14:49 +0200
Subject: [PATCH 16/46] change seed management

---
 examples/plot_knockoff_aggregation.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index eb08a61f4..eb1c3cc41 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -79,7 +79,6 @@ def single_run(
     n_bootstraps,
     seed=None,
 ):
-    seeds = np.arange(6) * seed
     # Generate data
     X, y, beta_true, noise = multivariate_simulation(
         n_samples,
@@ -97,11 +96,11 @@ def single_run(
         y,
         estimator=LassoCV(
             n_jobs=1,
-            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[0]),
-            random_state=seeds[1],
+            cv=KFold(n_splits=5, shuffle=True, random_state=0),
+            random_state=1,
         ),
         n_bootstraps=1,
-        random_state=seeds[2],
+        random_state=2,
     )
     mx_selection, _ = model_x_knockoff_pvalue(test_scores, fdr=fdr)
     fdp_mx, power_mx = fdp_power(mx_selection, non_zero_index)
@@ -112,12 +111,12 @@ def single_run(
         y,
         estimator=LassoCV(
             n_jobs=1,
-            cv=KFold(n_splits=5, shuffle=True, random_state=seeds[3]),
-            random_state=seeds[4],
+            cv=KFold(n_splits=5, shuffle=True, random_state=3),
+            random_state=4,
         ),
         n_bootstraps=n_bootstraps,
         n_jobs=1,
-        random_state=seeds[5],
+        random_state=5,
     )
 
     # Use p-values aggregation [2]

From 1dfcb632bd4e976a4dd5aad7fa57bdee75ad0edf Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 28 Aug 2025 10:16:03 +0200
Subject: [PATCH 17/46] remove rng

---
 examples/plot_model_agnostic_importance.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index efa759cbd..4e0664e92 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -37,9 +37,6 @@
 
 from hidimstat import LOCO, D0CRT
 
-# Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(0)
-
 #############################################################################
 # Generate data where classes are not linearly separable
 # --------------------------------------------------------------

From 54fe2e9e459f65e001618553f8221f4f9cd21ebb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 28 Aug 2025 10:16:48 +0200
Subject: [PATCH 18/46] fix name

---
 src/hidimstat/permutation_feature_importance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 485eecd65..dfddecf01 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -57,10 +57,10 @@ def __init__(
 
     def _permutation(self, X, group_id, seed):
         """Create the permuted data for the j-th group of covariates"""
-        rgn = np.random.RandomState(seed)
+        rng = np.random.RandomState(seed)
         X_perm_j = np.array(
             [
-                rgn.permutation(X[:, self._groups_ids[group_id]].copy())
+                rng.permutation(X[:, self._groups_ids[group_id]].copy())
                 for _ in range(self.n_permutations)
             ]
         )

From 072d3197dc45e7ce9905ba044829b570271b4d3f Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 2 Sep 2025 11:42:34 +0200
Subject: [PATCH 19/46] change seed

---
 examples/plot_2D_simulation_example.py | 8 ++++----
 examples/plot_knockoff_aggregation.py  | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index 022cc4620..e26486a31 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -172,7 +172,7 @@ def plot(maps, titles):
 
 # generating the data
 X_init, y, beta, epsilon = multivariate_simulation_spatial(
-    n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=70
+    n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=0
 )
 
 ##############################################################################
@@ -243,7 +243,7 @@ def plot(maps, titles):
 
 # compute desparsified lasso
 beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
-    X_init, y, n_jobs=n_jobs, seed=71
+    X_init, y, n_jobs=n_jobs, seed=0
 )
 pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
     desparsified_lasso_pvalue(X_init.shape[0], beta_hat, sigma_hat, precision_diagonal)
@@ -272,7 +272,7 @@ def plot(maps, titles):
 
 # clustered desparsified lasso (CluDL)
 ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
-    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=72
+    X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=0
 )
 beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
     clustered_inference_pvalue(n_samples, False, ward_, beta_hat, theta_hat, omega_diag)
@@ -297,7 +297,7 @@ def plot(maps, titles):
 # ensemble of clustered desparsified lasso (EnCluDL)
 list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
     ensemble_clustered_inference(
-        X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=73
+        X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=0
     )
 )
 beta_hat, selected_ecdl = ensemble_clustered_inference_pvalue(
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index eb1c3cc41..930d4f805 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -187,7 +187,7 @@ def effect_number_samples(n_samples):
             n_bootstraps,
             seed=seed,
         )
-        for seed in range(10, 10 + runs)
+        for seed in range(runs)
     )
 
     fdps_mx = []

From 469857fc85a2dc8000707240a1476d59e80e304b Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Wed, 3 Sep 2025 15:08:39 +0200
Subject: [PATCH 20/46] Apply suggestions from code review

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/permutation_feature_importance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index dfddecf01..5e8e16028 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -53,11 +53,11 @@ def __init__(
             n_jobs=n_jobs,
             n_permutations=n_permutations,
         )
-        self.random_state = check_random_state(random_state)
+        self.random_state = random_state
 
     def _permutation(self, X, group_id, seed):
         """Create the permuted data for the j-th group of covariates"""
-        rng = np.random.RandomState(seed)
+        rng = check_random_state(random_state)
         X_perm_j = np.array(
             [
                 rng.permutation(X[:, self._groups_ids[group_id]].copy())

From 1dd12cc6b51fca86fca7b7b14d7d3624a05680a8 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 3 Sep 2025 15:44:20 +0200
Subject: [PATCH 21/46] fix bug for random_generator in pertubation cases

---
 src/hidimstat/base_perturbation.py              | 12 ++++++++----
 src/hidimstat/permutation_feature_importance.py |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 7b5333f59..5de696183 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -3,6 +3,7 @@
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted
 from sklearn.metrics import root_mean_squared_error
+from sklearn.utils import check_random_state
 import warnings
 
 from hidimstat._utils.utils import _check_vim_predict_method
@@ -93,7 +94,7 @@ def fit(self, X, y=None, groups=None):
         else:
             raise ValueError("groups needs to be a dictionnary")
 
-    def predict(self, X):
+    def predict(self, X, random_generator):
         """
         Compute the predictions after perturbation of the data for each group of
         variables.
@@ -112,10 +113,10 @@ def predict(self, X):
         X_ = np.asarray(X)
 
         # Parallelize the computation of the importance scores for each group
-        if self.random_state is None:
+        if random_generator is None:
             list_seed = [None for i in range(self.n_groups)]
         else:
-            list_seed = self.random_state.randint(np.iinfo(np.int32).max) + np.arange(
+            list_seed = random_generator.randint(np.iinfo(np.int32).max) + np.arange(
                 self.n_groups
             )
         out_list = Parallel(n_jobs=self.n_jobs)(
@@ -147,6 +148,9 @@ def importance(self, X, y):
             - 'importance': the importance scores for each group.
         """
         self._check_fit(X)
+        random_generator = (
+            None if self.random_state is None else check_random_state(self.random_state)
+        )
 
         out_dict = dict()
 
@@ -154,7 +158,7 @@ def importance(self, X, y):
         loss_reference = self.loss(y, y_pred)
         out_dict["loss_reference"] = loss_reference
 
-        y_pred = self.predict(X)
+        y_pred = self.predict(X, random_generator=random_generator)
         out_dict["loss"] = dict()
         for j, y_pred_j in enumerate(y_pred):
             list_loss = []
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 5e8e16028..f24cf2a4a 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -57,7 +57,7 @@ def __init__(
 
     def _permutation(self, X, group_id, seed):
         """Create the permuted data for the j-th group of covariates"""
-        rng = check_random_state(random_state)
+        rng = check_random_state(seed)
         X_perm_j = np.array(
             [
                 rng.permutation(X[:, self._groups_ids[group_id]].copy())

From d6b8d1eb20929fdd269d73ad99363cc0f7f2771b Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 3 Sep 2025 15:51:15 +0200
Subject: [PATCH 22/46] fix test

---
 test/test_conditional_feature_importance.py | 2 +-
 test/test_leave_one_covariate_out.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index 097b5dd8f..3465a78a1 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -377,7 +377,7 @@ def test_unfitted_predict(self, data_generator):
         )
 
         with pytest.raises(ValueError, match="The class is not fitted."):
-            cfi.predict(X)
+            cfi.predict(X, None)
 
     def test_unfitted_importance(self, data_generator):
         """Test importance method with unfitted model"""
diff --git a/test/test_leave_one_covariate_out.py b/test/test_leave_one_covariate_out.py
index d8fd2a763..c2ba266bd 100644
--- a/test/test_leave_one_covariate_out.py
+++ b/test/test_leave_one_covariate_out.py
@@ -119,7 +119,7 @@ def test_raises_value_error():
             estimator=fitted_model,
             method="predict",
         )
-        loco.predict(X)
+        loco.predict(X, None)
     with pytest.raises(ValueError, match="The class is not fitted."):
         fitted_model = LinearRegression().fit(X, y)
         loco = LOCO(

From 6718931858e16708fbd724f48fa5cc4ff4f7fe2d Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 3 Sep 2025 16:41:33 +0200
Subject: [PATCH 23/46] fix randomization

---
 src/hidimstat/conditional_feature_importance.py | 15 ++++++++++++---
 test/test_conditional_feature_importance.py     |  2 +-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 499173a3b..2933beb01 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -110,6 +110,14 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
         else:
             self.var_type = var_type
 
+        # base on the recomendation ofnumpy for paralellization of random generator
+        # see https://numpy.org/doc/stable/reference/random/parallel.html
+        seedsequence = np.random.SeedSequence(
+            self.random_state
+            if isinstance(self.random_state, int)
+            else self.random_state.randint(np.iinfo(np.int32).max)
+        )
+
         self._list_imputation_models = [
             ConditionalSampler(
                 data_type=self.var_type[group_id],
@@ -123,13 +131,14 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     if self.imputation_model_categorical is None
                     else clone(self.imputation_model_categorical)
                 ),
-                random_state=seed,
+                random_state=np.random.RandomState(
+                    np.random.default_rng(seed).bit_generator
+                ),  # require a RandomState due to scikitlearn check
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
             for group_id, seed in zip(
                 range(self.n_groups),
-                self.random_state.randint(np.iinfo(np.int32).max)
-                + np.arange(self.n_groups),
+                seedsequence.spawn(self.n_groups),
             )
         ]
 
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index 3465a78a1..ea710cc03 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -133,7 +133,7 @@ def test_linear_data_partial(data_generator, n_permutation, cfi_seed):
         if rank > min_rank:
             min_rank = rank
     # accept missing ranking of 5 elements
-    assert min_rank < 15
+    assert min_rank < 20
 
 
 @pytest.mark.parametrize(

From 729b28a95615e6c239bd4575710ba5d7f97976eb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 4 Sep 2025 15:51:07 +0200
Subject: [PATCH 24/46] update the way to set seeds

---
 .../conditional_feature_importance.py         | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 2933beb01..6a0d761d8 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -110,13 +110,11 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
         else:
             self.var_type = var_type
 
-        # base on the recomendation ofnumpy for paralellization of random generator
+        # base on the recomendation of numpy for paralellization of random generator
         # see https://numpy.org/doc/stable/reference/random/parallel.html
-        seedsequence = np.random.SeedSequence(
-            self.random_state
-            if isinstance(self.random_state, int)
-            else self.random_state.randint(np.iinfo(np.int32).max)
-        )
+        streams = np.random.SeedSequence(
+            check_random_state(self.random_state).randint(np.iinfo(np.int32).max)
+        ).spawn(self.n_groups)
 
         self._list_imputation_models = [
             ConditionalSampler(
@@ -131,15 +129,13 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     if self.imputation_model_categorical is None
                     else clone(self.imputation_model_categorical)
                 ),
+                # require a RandomState due to scikitlearn check
                 random_state=np.random.RandomState(
-                    np.random.default_rng(seed).bit_generator
-                ),  # require a RandomState due to scikitlearn check
+                    np.random.default_rng(streams[group_id]).bit_generator
+                ),
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
-            for group_id, seed in zip(
-                range(self.n_groups),
-                seedsequence.spawn(self.n_groups),
-            )
+            for group_id in range(self.n_groups)
         ]
 
         # Parallelize the fitting of the covariate estimators

From 02a4c64282b4f9965aaaa55b1d44e96497535d57 Mon Sep 17 00:00:00 2001
From: Joseph Paillard <joseph.paillard@inria.fr>
Date: Mon, 8 Sep 2025 21:25:00 +0200
Subject: [PATCH 25/46] Update src/hidimstat/conditional_feature_importance.py

Co-authored-by: lionel kusch <lionel.a.kusch@inria.fr>
---
 src/hidimstat/conditional_feature_importance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 6a0d761d8..800049efc 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -80,7 +80,7 @@ def __init__(
         self.categorical_max_cardinality = categorical_max_cardinality
         self.imputation_model_categorical = imputation_model_categorical
         self.imputation_model_continuous = imputation_model_continuous
-        self.random_state = check_random_state(random_state)
+        self.random_state = random_state
 
     def fit(self, X, y=None, groups=None, var_type="auto"):
         """Fit the imputation models.

From d6314cafc2eab67a8fe96f7c87f5c95965466f7e Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 12:50:58 +0200
Subject: [PATCH 26/46] add a new way to check random

---
 src/hidimstat/_utils/utils.py                 | 47 +++++++++++++++++++
 src/hidimstat/base_perturbation.py            |  2 +-
 .../conditional_feature_importance.py         | 15 ++----
 src/hidimstat/conditional_sampling.py         | 10 ++--
 src/hidimstat/knockoffs.py                    |  2 +-
 src/hidimstat/noise_std.py                    |  1 +
 .../permutation_feature_importance.py         |  2 +-
 test/test_conditional_feature_importance.py   | 45 ++++++++++++++++++
 8 files changed, 107 insertions(+), 17 deletions(-)

diff --git a/src/hidimstat/_utils/utils.py b/src/hidimstat/_utils/utils.py
index 66166f444..2502bb47f 100644
--- a/src/hidimstat/_utils/utils.py
+++ b/src/hidimstat/_utils/utils.py
@@ -1,3 +1,7 @@
+import numbers
+import numpy as np
+
+
 def _check_vim_predict_method(method):
     """
     Validates that the method is a valid scikit-learn prediction method for variable importance measures.
@@ -25,3 +29,46 @@ def _check_vim_predict_method(method):
             "The method {} is not a valid method "
             "for variable importance measure prediction".format(method)
         )
+
+
+def check_random_state(seed):
+    """
+    Turn seed into a np.random.RandomState instance.
+    This is based on the implementation of check_random_state of sciktilearn:
+    https://github.com/scikit-learn/scikit-learn/blob/25dee604bae18205b01548348388baf7a1cdfe0e/sklearn/utils/validation.py#L1488
+
+    Parameters
+    ----------
+    seed : None, int or instance of RandomState
+        If seed is None, return the RandomState singleton used by np.random.
+        If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is already a RandomState instance, return it.
+        Otherwise raise ValueError.
+
+    Returns
+    -------
+    :class:`numpy:numpy.random.RandomState`
+        The random state object based on `seed` parameter.
+
+    Examples
+    --------
+    >>> from sklearn.utils.validation import check_random_state
+    >>> check_random_state(42)
+    RandomState(MT19937) at 0x...
+    """
+    if seed is None or seed is np.random:
+        return np.random.mtrand._rand
+    if isinstance(seed, numbers.Integral):
+        return np.random.RandomState(seed)
+    if (
+        (isinstance(seed, tuple) or isinstance(seed, list))
+        and len(seed) == 2
+        and isinstance(seed[0], numbers.Integral)
+        and isinstance(seed[1], numbers.Integral)
+    ):
+        return np.random.RandomState(np.random.default_rng(seed).bit_generator)
+    if isinstance(seed, np.random.RandomState):
+        return seed
+    raise ValueError(
+        "%r cannot be used to seed a numpy.random.RandomState instance" % seed
+    )
diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 5de696183..717617e7e 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -3,12 +3,12 @@
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted
 from sklearn.metrics import root_mean_squared_error
-from sklearn.utils import check_random_state
 import warnings
 
 from hidimstat._utils.utils import _check_vim_predict_method
 from hidimstat._utils.exception import InternalError
 from hidimstat.base_variable_importance import BaseVariableImportance
+from hidimstat._utils.utils import check_random_state
 
 
 class BasePerturbation(BaseVariableImportance):
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 800049efc..61d6f4b62 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -2,10 +2,10 @@
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted, clone, BaseEstimator
 from sklearn.metrics import root_mean_squared_error
-from sklearn.utils.validation import check_random_state
 
 from hidimstat.base_perturbation import BasePerturbation
 from hidimstat.conditional_sampling import ConditionalSampler
+from hidimstat._utils.utils import check_random_state
 
 
 class CFI(BasePerturbation):
@@ -109,12 +109,9 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
             self.var_type = [var_type for _ in range(self.n_groups)]
         else:
             self.var_type = var_type
-
-        # base on the recomendation of numpy for paralellization of random generator
-        # see https://numpy.org/doc/stable/reference/random/parallel.html
-        streams = np.random.SeedSequence(
-            check_random_state(self.random_state).randint(np.iinfo(np.int32).max)
-        ).spawn(self.n_groups)
+        seed_root = check_random_state(self.random_state).randint(
+            np.iinfo(np.int32).max, size=1
+        )[0]
 
         self._list_imputation_models = [
             ConditionalSampler(
@@ -130,9 +127,7 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     else clone(self.imputation_model_categorical)
                 ),
                 # require a RandomState due to scikitlearn check
-                random_state=np.random.RandomState(
-                    np.random.default_rng(streams[group_id]).bit_generator
-                ),
+                random_state=[group_id, seed_root],
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
             for group_id in range(self.n_groups)
diff --git a/src/hidimstat/conditional_sampling.py b/src/hidimstat/conditional_sampling.py
index f8920581a..cabc1b099 100644
--- a/src/hidimstat/conditional_sampling.py
+++ b/src/hidimstat/conditional_sampling.py
@@ -1,7 +1,7 @@
 import numpy as np
 from sklearn.base import MultiOutputMixin, check_is_fitted, BaseEstimator
 from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
-from sklearn.utils.validation import check_random_state
+from hidimstat._utils.utils import check_random_state
 
 
 def _check_data_type(
@@ -79,7 +79,7 @@ def __init__(
         self.data_type = data_type
         self.model_regression = model_regression
         self.model_categorical = model_categorical
-        self.rng = check_random_state(random_state)
+        self.random_state = random_state
         self.categorical_max_cardinality = categorical_max_cardinality
 
     def fit(self, X: np.ndarray, y: np.ndarray):
@@ -137,6 +137,8 @@ def sample(self, X: np.ndarray, y: np.ndarray, n_samples: int = 1) -> np.ndarray
         y_conditional : ndarray
             The samples from the conditional distribution.
         """
+        rng = check_random_state(self.random_state)
+        print(rng.rand(2))
 
         check_is_fitted(self.model)
 
@@ -149,7 +151,7 @@ def sample(self, X: np.ndarray, y: np.ndarray, n_samples: int = 1) -> np.ndarray
             y_hat = self.model.predict(X).reshape(y.shape)
             residual = y - y_hat
             residual_permuted = np.stack(
-                [self.rng.permutation(residual) for _ in range(n_samples)],
+                [rng.permutation(residual) for _ in range(n_samples)],
                 axis=0,
             )
             return y_hat[np.newaxis, ...] + residual_permuted
@@ -174,7 +176,7 @@ def sample(self, X: np.ndarray, y: np.ndarray, n_samples: int = 1) -> np.ndarray
                 y_pred_cond.append(
                     np.stack(
                         [
-                            self.rng.choice(classes, p=p, size=n_samples)
+                            rng.choice(classes, p=p, size=n_samples)
                             for p in y_pred_proba[index]
                         ],
                         axis=1,
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 37918117e..2b91d2109 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -4,7 +4,6 @@
 from sklearn.linear_model import LassoCV
 from sklearn.model_selection import KFold
 from sklearn.preprocessing import StandardScaler
-from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_memory
 from sklearn.base import clone
 
@@ -14,6 +13,7 @@
 )
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
 from hidimstat.statistical_tools.aggregation import quantile_aggregation
+from hidimstat._utils.utils import check_random_state
 
 
 def preconfigure_estimator_LassoCV(estimator, X, X_tilde, y, n_alphas=20):
diff --git a/src/hidimstat/noise_std.py b/src/hidimstat/noise_std.py
index 3696704e6..32445e19c 100644
--- a/src/hidimstat/noise_std.py
+++ b/src/hidimstat/noise_std.py
@@ -104,6 +104,7 @@ def reid(
         cv=cv,
         tol=tolerance,
         max_iter=max_iterance,
+        random_state=seed + 1,
         n_jobs=n_jobs,
     )
     clf_cv.fit(X_, y)
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index f24cf2a4a..c66378796 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -1,8 +1,8 @@
 import numpy as np
 from sklearn.metrics import root_mean_squared_error
-from sklearn.utils import check_random_state
 
 from hidimstat.base_perturbation import BasePerturbation
+from hidimstat._utils.utils import check_random_state
 
 
 class PFI(BasePerturbation):
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index ea710cc03..0338fd299 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -10,6 +10,7 @@
 
 from hidimstat import CFI, BasePerturbation
 from hidimstat._utils.exception import InternalError
+from hidimstat._utils.scenario import multivariate_simulation
 
 
 def run_cfi(X, y, n_permutation, seed):
@@ -568,3 +569,47 @@ def test_groups_warning(self, data_generator):
             " number of features for which importance is computed: 4",
         ):
             cfi.importance(X, y)
+
+
+@pytest.fixture(scope="module")
+def cfi_test_data():
+    """
+    Fixture to generate test data and a fitted LinearRegression model for CFI
+    reproducibility tests.
+    """
+    X, y, _, _ = multivariate_simulation(
+        n_samples=100,
+        n_features=5,
+        support_size=2,
+        rho=0,
+        value=1,
+        signal_noise_ratio=4,
+        rho_serial=0,
+        shuffle=False,
+        seed=0,
+    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+    model = LinearRegression()
+    model.fit(X_train, y_train)
+    return X_train, X_test, y_train, y_test, model
+
+
+def test_cfi_multiple_calls_are_repeatibility(cfi_test_data):
+    """
+    Test that multiple calls of .importance() when CFI is seeded provide deterministic
+    results.
+    """
+    X_train, X_test, y_train, y_test, model = cfi_test_data
+    cfi = CFI(
+        estimator=model,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=20,
+        method="predict",
+        random_state=0,
+        n_jobs=1,
+    )
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim = cfi.importance(X_test, y_test)["importance"]
+    print("reproduction")
+    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
+    assert np.array_equal(vim, vim_reproducible)

From f9df301e469ca4e1b4c1b0626254b911393a85e8 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 13:34:37 +0200
Subject: [PATCH 27/46] Apply suggestions from code review

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/base_perturbation.py    | 5 +----
 src/hidimstat/conditional_sampling.py | 1 -
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 717617e7e..27657bc92 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -94,7 +94,7 @@ def fit(self, X, y=None, groups=None):
         else:
             raise ValueError("groups needs to be a dictionnary")
 
-    def predict(self, X, random_generator):
+    def predict(self, X):
         """
         Compute the predictions after perturbation of the data for each group of
         variables.
@@ -148,9 +148,6 @@ def importance(self, X, y):
             - 'importance': the importance scores for each group.
         """
         self._check_fit(X)
-        random_generator = (
-            None if self.random_state is None else check_random_state(self.random_state)
-        )
 
         out_dict = dict()
 
diff --git a/src/hidimstat/conditional_sampling.py b/src/hidimstat/conditional_sampling.py
index cabc1b099..1e0791fbf 100644
--- a/src/hidimstat/conditional_sampling.py
+++ b/src/hidimstat/conditional_sampling.py
@@ -138,7 +138,6 @@ def sample(self, X: np.ndarray, y: np.ndarray, n_samples: int = 1) -> np.ndarray
             The samples from the conditional distribution.
         """
         rng = check_random_state(self.random_state)
-        print(rng.rand(2))
 
         check_is_fitted(self.model)
 

From 347a6afc1f559c98650c6cdfe4d2aafdfad1f037 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 13:47:57 +0200
Subject: [PATCH 28/46] change predict signature

---
 src/hidimstat/base_perturbation.py            | 27 +++++++++----------
 .../conditional_feature_importance.py         |  9 ++++---
 .../permutation_feature_importance.py         |  9 ++++---
 test/test_base_perturbation.py                |  2 +-
 test/test_conditional_feature_importance.py   |  2 +-
 test/test_leave_one_covariate_out.py          |  2 +-
 6 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index 27657bc92..af725f33b 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -113,17 +113,12 @@ def predict(self, X):
         X_ = np.asarray(X)
 
         # Parallelize the computation of the importance scores for each group
-        if random_generator is None:
-            list_seed = [None for i in range(self.n_groups)]
-        else:
-            list_seed = random_generator.randint(np.iinfo(np.int32).max) + np.arange(
-                self.n_groups
-            )
+        seed_root = check_random_state(self.random_state).randint(
+            np.iinfo(np.int32).max, size=1
+        )[0]
         out_list = Parallel(n_jobs=self.n_jobs)(
-            delayed(self._joblib_predict_one_group)(X_, group_id, group_key, seed)
-            for group_id, (group_key, seed) in enumerate(
-                zip(self.groups.keys(), list_seed)
-            )
+            delayed(self._joblib_predict_one_group)(X_, group_id, group_key, seed_root)
+            for group_id, group_key in enumerate(self.groups.keys())
         )
         return np.stack(out_list, axis=0)
 
@@ -155,7 +150,7 @@ def importance(self, X, y):
         loss_reference = self.loss(y, y_pred)
         out_dict["loss_reference"] = loss_reference
 
-        y_pred = self.predict(X, random_generator=random_generator)
+        y_pred = self.predict(X)
         out_dict["loss"] = dict()
         for j, y_pred_j in enumerate(y_pred):
             list_loss = []
@@ -241,7 +236,7 @@ def _check_fit(self, X):
                 f"{number_unique_feature_in_groups}"
             )
 
-    def _joblib_predict_one_group(self, X, group_id, group_key, seed):
+    def _joblib_predict_one_group(self, X, group_id, group_key, seed_root):
         """
         Compute the predictions after perturbation of the data for a given
         group of variables. This function is parallelized.
@@ -254,7 +249,7 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed):
             The index of the group of variables.
         group_key: str, int
             The key of the group of variables. (parameter use for debugging)
-        seed: int, optional
+        seed_root: int, optional
             Random seed for reproducibility.
         """
         group_ids = self._groups_ids[group_id]
@@ -263,7 +258,9 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed):
         # where the j-th group of covariates is permuted
         X_perm = np.empty((self.n_permutations, X.shape[0], X.shape[1]))
         X_perm[:, :, non_group_ids] = np.delete(X, group_ids, axis=1)
-        X_perm[:, :, group_ids] = self._permutation(X, group_id=group_id, seed=seed)
+        X_perm[:, :, group_ids] = self._permutation(
+            X, group_id=group_id, seed_root=seed_root
+        )
         # Reshape X_perm to allow for batch prediction
         X_perm_batch = X_perm.reshape(-1, X.shape[1])
         y_pred_perm = getattr(self.estimator, self.method)(X_perm_batch)
@@ -277,6 +274,6 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed):
             )
         return y_pred_perm
 
-    def _permutation(self, X, group_id, seed):
+    def _permutation(self, X, group_id, seed_root):
         """Method for creating the permuted data for the j-th group of covariates."""
         raise NotImplementedError
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 88a63e9b7..be9f71e13 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -183,9 +183,12 @@ def _check_fit(self, X):
         for m in self._list_imputation_models:
             check_is_fitted(m.model)
 
-    def _permutation(self, X, group_id, seed):
-        """Sample from the conditional distribution using a permutation of the
-        residuals."""
+    def _permutation(self, X, group_id, seed_root):
+        """
+        Sample from the conditional distribution using a permutation of the
+        residuals.
+        Warning: This method is run in parallel.
+        """
         X_j = X[:, self._groups_ids[group_id]].copy()
         X_minus_j = np.delete(X, self._groups_ids[group_id], axis=1)
         return self._list_imputation_models[group_id].sample(
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 1b8c0ef30..7f54a2abe 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -55,9 +55,12 @@ def __init__(
         )
         self.random_state = random_state
 
-    def _permutation(self, X, group_id, seed):
-        """Create the permuted data for the j-th group of covariates"""
-        rng = check_random_state(seed)
+    def _permutation(self, X, group_id, seed_root):
+        """
+        Create the permuted data for the j-th group of covariates
+        Warning: This method is run in parallel.
+        """
+        rng = check_random_state([group_id, seed_root])
         X_perm_j = np.array(
             [
                 rng.permutation(X[:, self._groups_ids[group_id]].copy())
diff --git a/test/test_base_perturbation.py b/test/test_base_perturbation.py
index 835c3f12e..fb2c408fb 100644
--- a/test/test_base_perturbation.py
+++ b/test/test_base_perturbation.py
@@ -11,4 +11,4 @@ def test_no_implemented_methods():
     estimator.fit(X[:, 0], X[:, 1])
     basic_class = BasePerturbation(estimator=estimator)
     with pytest.raises(NotImplementedError):
-        basic_class._permutation(X, group_id=None, seed=0)
+        basic_class._permutation(X, group_id=None, seed_root=0)
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index 0338fd299..da40596e0 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -378,7 +378,7 @@ def test_unfitted_predict(self, data_generator):
         )
 
         with pytest.raises(ValueError, match="The class is not fitted."):
-            cfi.predict(X, None)
+            cfi.predict(X)
 
     def test_unfitted_importance(self, data_generator):
         """Test importance method with unfitted model"""
diff --git a/test/test_leave_one_covariate_out.py b/test/test_leave_one_covariate_out.py
index c2ba266bd..d8fd2a763 100644
--- a/test/test_leave_one_covariate_out.py
+++ b/test/test_leave_one_covariate_out.py
@@ -119,7 +119,7 @@ def test_raises_value_error():
             estimator=fitted_model,
             method="predict",
         )
-        loco.predict(X, None)
+        loco.predict(X)
     with pytest.raises(ValueError, match="The class is not fitted."):
         fitted_model = LinearRegression().fit(X, y)
         loco = LOCO(

From 05ca2b8c82593f712a6711c8a69efe5d9ba0364a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 13:49:28 +0200
Subject: [PATCH 29/46] update docstring of check_random_state

---
 src/hidimstat/_utils/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/hidimstat/_utils/utils.py b/src/hidimstat/_utils/utils.py
index 0a7f65e25..5d6b04b83 100644
--- a/src/hidimstat/_utils/utils.py
+++ b/src/hidimstat/_utils/utils.py
@@ -39,9 +39,10 @@ def check_random_state(seed):
 
     Parameters
     ----------
-    seed : None, int or instance of RandomState
+    seed : None, int, tuple/list of 2 ints, or instance of RandomState
         If seed is None, return the RandomState singleton used by np.random.
         If seed is an int, return a new RandomState instance seeded with seed.
+        If seed is a tuple/list of 2 integers, creates a new seeded RandomState.
         If seed is already a RandomState instance, return it.
         Otherwise raise ValueError.
 

From b13f7b26027b32d32f1defbcbf4f08c230ec9365 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 15:40:58 +0200
Subject: [PATCH 30/46] improvement gestion of seed

---
 src/hidimstat/_utils/utils.py                 | 99 +++++++++++++++++++
 .../conditional_feature_importance.py         |  9 +-
 test/test_conditional_feature_importance.py   | 98 +++++++++++++++++-
 3 files changed, 200 insertions(+), 6 deletions(-)

diff --git a/src/hidimstat/_utils/utils.py b/src/hidimstat/_utils/utils.py
index 5d6b04b83..b56e0b1a7 100644
--- a/src/hidimstat/_utils/utils.py
+++ b/src/hidimstat/_utils/utils.py
@@ -73,3 +73,102 @@ def check_random_state(seed):
     raise ValueError(
         "%r cannot be used to seed a numpy.random.RandomState instance" % seed
     )
+
+
+class SeedGenerator:
+    """Generate seeds for parallel random number generation based on numpy guidelines.
+
+    This class implements the recommended approach for parallel random number generation
+    from numpy's documentation: https://numpy.org/doc/stable/reference/random/parallel.html
+
+    Parameters
+    ----------
+    seed_root : int
+        The root seed value to generate worker-specific seeds.
+    """
+
+    def __init__(self, seed_root):
+        self.seed_root = seed_root
+
+    def get_seed(self, worker_id):
+        """Generate a seed pair for a specific worker.
+
+        Parameters
+        ----------
+        worker_id : int
+            Unique identifier for the worker/job.
+
+        Returns
+        -------
+        list
+            A list containing [worker_id, seed_root] for random number generation.
+        """
+        return [worker_id, self.seed_root]
+
+
+class NoneGenerator:
+    """Generator that always returns None as seed.
+
+    This is used when no specific seed is required.
+    """
+
+    def get_seed(self, worker_id):
+        """Return None regardless of worker_id.
+
+        Parameters
+        ----------
+        worker_id : int
+            Unique identifier for the worker/job (unused).
+
+        Returns
+        -------
+        None
+            Always returns None.
+        """
+        return None
+
+
+def get_seed_generator(random_state):
+    """
+    Create appropriate seed generator based on input type.
+    WARNING: this function should have the same branch that check_random_state
+
+    Parameters
+    ----------
+    random_state : None, int, tuple/list of 2 ints, or numpy.random.RandomState
+        The random state to use for seed generation.
+
+    Returns
+    -------
+    SeedGenerator or NoneGenerator
+        An appropriate generator for creating seeds.
+
+    Raises
+    ------
+    ValueError
+        If random_state is not of a supported type.
+    """
+    if random_state is None:
+        return NoneGenerator()
+    elif isinstance(random_state, numbers.Integral):
+        return SeedGenerator(seed_root=random_state)
+    elif (
+        (isinstance(random_state, tuple) or isinstance(random_state, list))
+        and len(random_state) == 2
+        and isinstance(random_state[0], numbers.Integral)
+        and isinstance(random_state[1], numbers.Integral)
+    ):
+        seed_root = check_random_state(random_state).randint(
+            np.iinfo(np.int32).max, size=1
+        )[0]
+        return SeedGenerator(seed_root=random_state)
+    elif isinstance(random_state, np.random.RandomState):
+        seed_root = check_random_state(random_state).randint(
+            np.iinfo(np.int32).max, size=1
+        )[0]
+        return SeedGenerator(seed_root=seed_root)
+    else:
+        raise ValueError(
+            "%r cannot be used to seed a numpy.random.RandomState instance"
+            % random_state
+        )
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index be9f71e13..78a0a6c51 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -5,7 +5,7 @@
 
 from hidimstat.base_perturbation import BasePerturbation
 from hidimstat.conditional_sampling import ConditionalSampler
-from hidimstat._utils.utils import check_random_state
+from hidimstat._utils.utils import get_seed_generator
 
 
 class CFI(BasePerturbation):
@@ -109,9 +109,8 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
             self.var_type = [var_type for _ in range(self.n_groups)]
         else:
             self.var_type = var_type
-        seed_root = check_random_state(self.random_state).randint(
-            np.iinfo(np.int32).max, size=1
-        )[0]
+
+        seed_generator = get_seed_generator(self.random_state)
 
         self._list_imputation_models = [
             ConditionalSampler(
@@ -127,7 +126,7 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     else clone(self.imputation_model_categorical)
                 ),
                 # require a RandomState due to scikitlearn check
-                random_state=[group_id, seed_root],
+                random_state=seed_generator.get_seed(group_id),
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
             for group_id in range(self.n_groups)
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index da40596e0..fa689b84a 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -329,7 +329,7 @@ def test_categorical(
             estimator=fitted_model,
             imputation_model_continuous=LinearRegression(),
             imputation_model_categorical=LogisticRegression(),
-            random_state=seed + 1,
+            random_state=0,
         )
 
         var_type = ["continuous", "continuous", "categorical"]
@@ -613,3 +613,99 @@ def test_cfi_multiple_calls_are_repeatibility(cfi_test_data):
     print("reproduction")
     vim_reproducible = cfi.importance(X_test, y_test)["importance"]
     assert np.array_equal(vim, vim_reproducible)
+
+
+def test_cfi_multiple_calls_are_repeatibility_None(cfi_test_data):
+    """
+    Test that multiple calls of .importance() when CFI is seeded provide deterministic
+    results.
+    """
+    X_train, X_test, y_train, y_test, model = cfi_test_data
+    cfi = CFI(
+        estimator=model,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=20,
+        method="predict",
+        random_state=None,
+        n_jobs=1,
+    )
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim = cfi.importance(X_test, y_test)["importance"]
+    # rerun importance
+    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
+    assert np.array_equal(vim, vim_reproducible)
+
+
+def test_cfi_multiple_calls_are_not_repeatibility_None(cfi_test_data):
+    """
+    Test that multiple calls of .importance() when CFI is seeded provide deterministic
+    results.
+    """
+    X_train, X_test, y_train, y_test, model = cfi_test_data
+    cfi = CFI(
+        estimator=model,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=20,
+        method="predict",
+        random_state=None,
+        n_jobs=1,
+    )
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim = cfi.importance(X_test, y_test)["importance"]
+
+    # refit
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
+    )
+
+
+def test_cfi_multiple_calls_are_repeatibility_rng(cfi_test_data):
+    """
+    Test that multiple calls of .importance() when CFI is seeded provide deterministic
+    results.
+    """
+    X_train, X_test, y_train, y_test, model = cfi_test_data
+    rng = np.random.RandomState(0)
+    cfi = CFI(
+        estimator=model,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=20,
+        method="predict",
+        random_state=rng,
+        n_jobs=1,
+    )
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim = cfi.importance(X_test, y_test)["importance"]
+    # rerun importance
+    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
+    )
+
+
+def test_cfi_multiple_calls_are_not_repeatibility_rng(cfi_test_data):
+    """
+    Test that multiple calls of .importance() when CFI is seeded provide deterministic
+    results.
+    """
+    X_train, X_test, y_train, y_test, model = cfi_test_data
+    rng = np.random.RandomState(0)
+    cfi = CFI(
+        estimator=model,
+        imputation_model_continuous=LinearRegression(),
+        n_permutations=20,
+        method="predict",
+        random_state=rng,
+        n_jobs=1,
+    )
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim = cfi.importance(X_test, y_test)["importance"]
+
+    # refit
+    cfi.fit(X_train, groups=None, var_type="auto")
+    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
+    np.testing.assert_raises(
+        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
+    )

From fad51275973c084bc5afb94db1b5d5c907bf242a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 12:20:43 +0200
Subject: [PATCH 31/46] remove management of seed

---
 src/hidimstat/_utils/utils.py                 | 147 ------------------
 .../conditional_feature_importance.py         |   8 +-
 test/test_conditional_feature_importance.py   | 140 -----------------
 3 files changed, 3 insertions(+), 292 deletions(-)

diff --git a/src/hidimstat/_utils/utils.py b/src/hidimstat/_utils/utils.py
index b56e0b1a7..df9585241 100644
--- a/src/hidimstat/_utils/utils.py
+++ b/src/hidimstat/_utils/utils.py
@@ -1,7 +1,3 @@
-import numbers
-import numpy as np
-
-
 def _check_vim_predict_method(method):
     """
     Validates that the method is a valid scikit-learn prediction method for variable importance measures.
@@ -29,146 +25,3 @@ def _check_vim_predict_method(method):
             "The method {} is not a valid method "
             "for variable importance measure prediction".format(method)
         )
-
-
-def check_random_state(seed):
-    """
-    Turn seed into a np.random.RandomState instance.
-    This is based on the implementation of check_random_state of sciktilearn:
-    https://github.com/scikit-learn/scikit-learn/blob/25dee604bae18205b01548348388baf7a1cdfe0e/sklearn/utils/validation.py#L1488
-
-    Parameters
-    ----------
-    seed : None, int, tuple/list of 2 ints, or instance of RandomState
-        If seed is None, return the RandomState singleton used by np.random.
-        If seed is an int, return a new RandomState instance seeded with seed.
-        If seed is a tuple/list of 2 integers, creates a new seeded RandomState.
-        If seed is already a RandomState instance, return it.
-        Otherwise raise ValueError.
-
-    Returns
-    -------
-    :class:`numpy:numpy.random.RandomState`
-        The random state object based on `seed` parameter.
-
-    Examples
-    --------
-    >>> from sklearn.utils.validation import check_random_state
-    >>> check_random_state(42)
-    RandomState(MT19937) at 0x...
-    """
-    if seed is None or seed is np.random:
-        return np.random.mtrand._rand
-    if isinstance(seed, numbers.Integral):
-        return np.random.RandomState(seed)
-    if (
-        (isinstance(seed, tuple) or isinstance(seed, list))
-        and len(seed) == 2
-        and isinstance(seed[0], numbers.Integral)
-        and isinstance(seed[1], numbers.Integral)
-    ):
-        return np.random.RandomState(np.random.default_rng(seed).bit_generator)
-    if isinstance(seed, np.random.RandomState):
-        return seed
-    raise ValueError(
-        "%r cannot be used to seed a numpy.random.RandomState instance" % seed
-    )
-
-
-class SeedGenerator:
-    """Generate seeds for parallel random number generation based on numpy guidelines.
-
-    This class implements the recommended approach for parallel random number generation
-    from numpy's documentation: https://numpy.org/doc/stable/reference/random/parallel.html
-
-    Parameters
-    ----------
-    seed_root : int
-        The root seed value to generate worker-specific seeds.
-    """
-
-    def __init__(self, seed_root):
-        self.seed_root = seed_root
-
-    def get_seed(self, worker_id):
-        """Generate a seed pair for a specific worker.
-
-        Parameters
-        ----------
-        worker_id : int
-            Unique identifier for the worker/job.
-
-        Returns
-        -------
-        list
-            A list containing [worker_id, seed_root] for random number generation.
-        """
-        return [worker_id, self.seed_root]
-
-
-class NoneGenerator:
-    """Generator that always returns None as seed.
-
-    This is used when no specific seed is required.
-    """
-
-    def get_seed(self, worker_id):
-        """Return None regardless of worker_id.
-
-        Parameters
-        ----------
-        worker_id : int
-            Unique identifier for the worker/job (unused).
-
-        Returns
-        -------
-        None
-            Always returns None.
-        """
-        return None
-
-
-def get_seed_generator(random_state):
-    """
-    Create appropriate seed generator based on input type.
-    WARNING: this function should have the same branch that check_random_state
-
-    Parameters
-    ----------
-    random_state : None, int, tuple/list of 2 ints, or numpy.random.RandomState
-        The random state to use for seed generation.
-
-    Returns
-    -------
-    SeedGenerator or NoneGenerator
-        An appropriate generator for creating seeds.
-
-    Raises
-    ------
-    ValueError
-        If random_state is not of a supported type.
-    """
-    if random_state is None:
-        return NoneGenerator()
-    elif isinstance(random_state, numbers.Integral):
-        return SeedGenerator(seed_root=random_state)
-    elif (
-        (isinstance(random_state, tuple) or isinstance(random_state, list))
-        and len(random_state) == 2
-        and isinstance(random_state[0], numbers.Integral)
-        and isinstance(random_state[1], numbers.Integral)
-    ):
-        seed_root = check_random_state(random_state).randint(
-            np.iinfo(np.int32).max, size=1
-        )[0]
-        return SeedGenerator(seed_root=random_state)
-    elif isinstance(random_state, np.random.RandomState):
-        seed_root = check_random_state(random_state).randint(
-            np.iinfo(np.int32).max, size=1
-        )[0]
-        return SeedGenerator(seed_root=seed_root)
-    else:
-        raise ValueError(
-            "%r cannot be used to seed a numpy.random.RandomState instance"
-            % random_state
-        )
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 78a0a6c51..518d6049b 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -2,10 +2,10 @@
 from joblib import Parallel, delayed
 from sklearn.base import check_is_fitted, clone, BaseEstimator
 from sklearn.metrics import root_mean_squared_error
+from sklearn.utils.validation import check_random_state
 
 from hidimstat.base_perturbation import BasePerturbation
 from hidimstat.conditional_sampling import ConditionalSampler
-from hidimstat._utils.utils import get_seed_generator
 
 
 class CFI(BasePerturbation):
@@ -104,14 +104,13 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
         self : object
             Returns the instance itself.
         """
+        self.random_state = check_random_state(self.random_state)
         super().fit(X, None, groups=groups)
         if isinstance(var_type, str):
             self.var_type = [var_type for _ in range(self.n_groups)]
         else:
             self.var_type = var_type
 
-        seed_generator = get_seed_generator(self.random_state)
-
         self._list_imputation_models = [
             ConditionalSampler(
                 data_type=self.var_type[group_id],
@@ -125,8 +124,7 @@ def fit(self, X, y=None, groups=None, var_type="auto"):
                     if self.imputation_model_categorical is None
                     else clone(self.imputation_model_categorical)
                 ),
-                # require a RandomState due to scikitlearn check
-                random_state=seed_generator.get_seed(group_id),
+                random_state=self.random_state,
                 categorical_max_cardinality=self.categorical_max_cardinality,
             )
             for group_id in range(self.n_groups)
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index fa689b84a..663d39b5e 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -569,143 +569,3 @@ def test_groups_warning(self, data_generator):
             " number of features for which importance is computed: 4",
         ):
             cfi.importance(X, y)
-
-
-@pytest.fixture(scope="module")
-def cfi_test_data():
-    """
-    Fixture to generate test data and a fitted LinearRegression model for CFI
-    reproducibility tests.
-    """
-    X, y, _, _ = multivariate_simulation(
-        n_samples=100,
-        n_features=5,
-        support_size=2,
-        rho=0,
-        value=1,
-        signal_noise_ratio=4,
-        rho_serial=0,
-        shuffle=False,
-        seed=0,
-    )
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
-    model = LinearRegression()
-    model.fit(X_train, y_train)
-    return X_train, X_test, y_train, y_test, model
-
-
-def test_cfi_multiple_calls_are_repeatibility(cfi_test_data):
-    """
-    Test that multiple calls of .importance() when CFI is seeded provide deterministic
-    results.
-    """
-    X_train, X_test, y_train, y_test, model = cfi_test_data
-    cfi = CFI(
-        estimator=model,
-        imputation_model_continuous=LinearRegression(),
-        n_permutations=20,
-        method="predict",
-        random_state=0,
-        n_jobs=1,
-    )
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim = cfi.importance(X_test, y_test)["importance"]
-    print("reproduction")
-    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
-    assert np.array_equal(vim, vim_reproducible)
-
-
-def test_cfi_multiple_calls_are_repeatibility_None(cfi_test_data):
-    """
-    Test that multiple calls of .importance() when CFI is seeded provide deterministic
-    results.
-    """
-    X_train, X_test, y_train, y_test, model = cfi_test_data
-    cfi = CFI(
-        estimator=model,
-        imputation_model_continuous=LinearRegression(),
-        n_permutations=20,
-        method="predict",
-        random_state=None,
-        n_jobs=1,
-    )
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim = cfi.importance(X_test, y_test)["importance"]
-    # rerun importance
-    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
-    assert np.array_equal(vim, vim_reproducible)
-
-
-def test_cfi_multiple_calls_are_not_repeatibility_None(cfi_test_data):
-    """
-    Test that multiple calls of .importance() when CFI is seeded provide deterministic
-    results.
-    """
-    X_train, X_test, y_train, y_test, model = cfi_test_data
-    cfi = CFI(
-        estimator=model,
-        imputation_model_continuous=LinearRegression(),
-        n_permutations=20,
-        method="predict",
-        random_state=None,
-        n_jobs=1,
-    )
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim = cfi.importance(X_test, y_test)["importance"]
-
-    # refit
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
-    np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
-    )
-
-
-def test_cfi_multiple_calls_are_repeatibility_rng(cfi_test_data):
-    """
-    Test that multiple calls of .importance() when CFI is seeded provide deterministic
-    results.
-    """
-    X_train, X_test, y_train, y_test, model = cfi_test_data
-    rng = np.random.RandomState(0)
-    cfi = CFI(
-        estimator=model,
-        imputation_model_continuous=LinearRegression(),
-        n_permutations=20,
-        method="predict",
-        random_state=rng,
-        n_jobs=1,
-    )
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim = cfi.importance(X_test, y_test)["importance"]
-    # rerun importance
-    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
-    np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
-    )
-
-
-def test_cfi_multiple_calls_are_not_repeatibility_rng(cfi_test_data):
-    """
-    Test that multiple calls of .importance() when CFI is seeded provide deterministic
-    results.
-    """
-    X_train, X_test, y_train, y_test, model = cfi_test_data
-    rng = np.random.RandomState(0)
-    cfi = CFI(
-        estimator=model,
-        imputation_model_continuous=LinearRegression(),
-        n_permutations=20,
-        method="predict",
-        random_state=rng,
-        n_jobs=1,
-    )
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim = cfi.importance(X_test, y_test)["importance"]
-
-    # refit
-    cfi.fit(X_train, groups=None, var_type="auto")
-    vim_reproducible = cfi.importance(X_test, y_test)["importance"]
-    np.testing.assert_raises(
-        AssertionError, np.testing.assert_array_equal, vim, vim_reproducible
-    )

From 8fd630c804fb0ebc9bf0ce798e0833bf9b130a6f Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 12:39:08 +0200
Subject: [PATCH 32/46] remove some management of seeds

---
 src/hidimstat/base_perturbation.py              | 17 ++++-------------
 src/hidimstat/conditional_sampling.py           |  2 +-
 src/hidimstat/knockoffs.py                      |  3 +--
 src/hidimstat/leave_one_covariate_out.py        |  2 +-
 src/hidimstat/permutation_feature_importance.py | 13 +++++--------
 test/test_base_perturbation.py                  |  2 +-
 test/test_conditional_feature_importance.py     |  1 -
 7 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/src/hidimstat/base_perturbation.py b/src/hidimstat/base_perturbation.py
index af725f33b..ef3c58343 100644
--- a/src/hidimstat/base_perturbation.py
+++ b/src/hidimstat/base_perturbation.py
@@ -8,7 +8,6 @@
 from hidimstat._utils.utils import _check_vim_predict_method
 from hidimstat._utils.exception import InternalError
 from hidimstat.base_variable_importance import BaseVariableImportance
-from hidimstat._utils.utils import check_random_state
 
 
 class BasePerturbation(BaseVariableImportance):
@@ -54,7 +53,6 @@ def __init__(
         self.n_jobs = n_jobs
         self.n_permutations = n_permutations
         self.n_groups = None
-        self.random_state = None
 
     def fit(self, X, y=None, groups=None):
         """Base fit method for perturbation-based methods. Identifies the groups.
@@ -113,11 +111,8 @@ def predict(self, X):
         X_ = np.asarray(X)
 
         # Parallelize the computation of the importance scores for each group
-        seed_root = check_random_state(self.random_state).randint(
-            np.iinfo(np.int32).max, size=1
-        )[0]
         out_list = Parallel(n_jobs=self.n_jobs)(
-            delayed(self._joblib_predict_one_group)(X_, group_id, group_key, seed_root)
+            delayed(self._joblib_predict_one_group)(X_, group_id, group_key)
             for group_id, group_key in enumerate(self.groups.keys())
         )
         return np.stack(out_list, axis=0)
@@ -236,7 +231,7 @@ def _check_fit(self, X):
                 f"{number_unique_feature_in_groups}"
             )
 
-    def _joblib_predict_one_group(self, X, group_id, group_key, seed_root):
+    def _joblib_predict_one_group(self, X, group_id, group_key):
         """
         Compute the predictions after perturbation of the data for a given
         group of variables. This function is parallelized.
@@ -249,8 +244,6 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed_root):
             The index of the group of variables.
         group_key: str, int
             The key of the group of variables. (parameter use for debugging)
-        seed_root: int, optional
-            Random seed for reproducibility.
         """
         group_ids = self._groups_ids[group_id]
         non_group_ids = np.delete(np.arange(X.shape[1]), group_ids)
@@ -258,9 +251,7 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed_root):
         # where the j-th group of covariates is permuted
         X_perm = np.empty((self.n_permutations, X.shape[0], X.shape[1]))
         X_perm[:, :, non_group_ids] = np.delete(X, group_ids, axis=1)
-        X_perm[:, :, group_ids] = self._permutation(
-            X, group_id=group_id, seed_root=seed_root
-        )
+        X_perm[:, :, group_ids] = self._permutation(X, group_id=group_id)
         # Reshape X_perm to allow for batch prediction
         X_perm_batch = X_perm.reshape(-1, X.shape[1])
         y_pred_perm = getattr(self.estimator, self.method)(X_perm_batch)
@@ -274,6 +265,6 @@ def _joblib_predict_one_group(self, X, group_id, group_key, seed_root):
             )
         return y_pred_perm
 
-    def _permutation(self, X, group_id, seed_root):
+    def _permutation(self, X, group_id):
         """Method for creating the permuted data for the j-th group of covariates."""
         raise NotImplementedError
diff --git a/src/hidimstat/conditional_sampling.py b/src/hidimstat/conditional_sampling.py
index 1e0791fbf..8aaf55580 100644
--- a/src/hidimstat/conditional_sampling.py
+++ b/src/hidimstat/conditional_sampling.py
@@ -1,7 +1,7 @@
 import numpy as np
 from sklearn.base import MultiOutputMixin, check_is_fitted, BaseEstimator
 from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
-from hidimstat._utils.utils import check_random_state
+from sklearn.utils.validation import check_random_state
 
 
 def _check_data_type(
diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 2b91d2109..764ad88ad 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -4,8 +4,8 @@
 from sklearn.linear_model import LassoCV
 from sklearn.model_selection import KFold
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_memory
-from sklearn.base import clone
 
 from hidimstat.gaussian_knockoff import (
     gaussian_knockoff_generation,
@@ -13,7 +13,6 @@
 )
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
 from hidimstat.statistical_tools.aggregation import quantile_aggregation
-from hidimstat._utils.utils import check_random_state
 
 
 def preconfigure_estimator_LassoCV(estimator, X, X_tilde, y, n_alphas=20):
diff --git a/src/hidimstat/leave_one_covariate_out.py b/src/hidimstat/leave_one_covariate_out.py
index eec99d8cb..27bd29cbb 100644
--- a/src/hidimstat/leave_one_covariate_out.py
+++ b/src/hidimstat/leave_one_covariate_out.py
@@ -93,7 +93,7 @@ def _joblib_fit_one_group(self, estimator, X, y, key_groups):
         estimator.fit(X_minus_j, y)
         return estimator
 
-    def _joblib_predict_one_group(self, X, group_id, key_groups, seed):
+    def _joblib_predict_one_group(self, X, group_id, key_groups):
         """Predict the target variable after removing a group of covariates.
         Used in parallel."""
         X_minus_j = np.delete(X, self._groups_ids[group_id], axis=1)
diff --git a/src/hidimstat/permutation_feature_importance.py b/src/hidimstat/permutation_feature_importance.py
index 7f54a2abe..7fe72a78a 100644
--- a/src/hidimstat/permutation_feature_importance.py
+++ b/src/hidimstat/permutation_feature_importance.py
@@ -1,8 +1,8 @@
 import numpy as np
 from sklearn.metrics import root_mean_squared_error
+from sklearn.utils import check_random_state
 
 from hidimstat.base_perturbation import BasePerturbation
-from hidimstat._utils.utils import check_random_state
 
 
 class PFI(BasePerturbation):
@@ -55,15 +55,12 @@ def __init__(
         )
         self.random_state = random_state
 
-    def _permutation(self, X, group_id, seed_root):
-        """
-        Create the permuted data for the j-th group of covariates
-        Warning: This method is run in parallel.
-        """
-        rng = check_random_state([group_id, seed_root])
+    def _permutation(self, X, group_id):
+        """Create the permuted data for the j-th group of covariates"""
+        self.random_state = check_random_state(self.random_state)
         X_perm_j = np.array(
             [
-                rng.permutation(X[:, self._groups_ids[group_id]].copy())
+                self.random_state.permutation(X[:, self._groups_ids[group_id]].copy())
                 for _ in range(self.n_permutations)
             ]
         )
diff --git a/test/test_base_perturbation.py b/test/test_base_perturbation.py
index fb2c408fb..dd3ff6d6c 100644
--- a/test/test_base_perturbation.py
+++ b/test/test_base_perturbation.py
@@ -11,4 +11,4 @@ def test_no_implemented_methods():
     estimator.fit(X[:, 0], X[:, 1])
     basic_class = BasePerturbation(estimator=estimator)
     with pytest.raises(NotImplementedError):
-        basic_class._permutation(X, group_id=None, seed_root=0)
+        basic_class._permutation(X, group_id=None)
diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index 663d39b5e..4ca706bb3 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -10,7 +10,6 @@
 
 from hidimstat import CFI, BasePerturbation
 from hidimstat._utils.exception import InternalError
-from hidimstat._utils.scenario import multivariate_simulation
 
 
 def run_cfi(X, y, n_permutation, seed):

From 7b501038d40e94ab5c24b0c5fae2749fd8e42e61 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 12:40:18 +0200
Subject: [PATCH 33/46] fix missing revert

---
 src/hidimstat/conditional_feature_importance.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 518d6049b..c43b3a176 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -180,12 +180,9 @@ def _check_fit(self, X):
         for m in self._list_imputation_models:
             check_is_fitted(m.model)
 
-    def _permutation(self, X, group_id, seed_root):
-        """
-        Sample from the conditional distribution using a permutation of the
-        residuals.
-        Warning: This method is run in parallel.
-        """
+    def _permutation(self, X, group_id):
+        """Sample from the conditional distribution using a permutation of the
+        residuals."""
         X_j = X[:, self._groups_ids[group_id]].copy()
         X_minus_j = np.delete(X, self._groups_ids[group_id], axis=1)
         return self._list_imputation_models[group_id].sample(

From bf4a90920bbf0530e032231df317fea38581f12a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 16:13:04 +0200
Subject: [PATCH 34/46] fix clone

---
 src/hidimstat/knockoffs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 764ad88ad..fbf6d2b43 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -1,5 +1,6 @@
 import numpy as np
 from joblib import Parallel, delayed
+from sklearn.base import clone
 from sklearn.covariance import LedoitWolf
 from sklearn.linear_model import LassoCV
 from sklearn.model_selection import KFold

From 999cf8f2d1db81331d776196b6af41ae17bbb033 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 16:18:14 +0200
Subject: [PATCH 35/46] fix tests

---
 test/test_conditional_feature_importance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index bacdf4493..ce87b326a 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -133,8 +133,8 @@ def test_linear_data_partial(data_generator, n_permutation, cfi_seed):
         rank = np.where(importance_sort == index)[0]
         if rank > min_rank:
             min_rank = rank
-    # accept missing ranking of 5 elements
-    assert min_rank < 20
+    # accept missing ranking of 15 elements
+    assert min_rank < 25
 
 
 @pytest.mark.parametrize(

From 3531d0632cbaf4981e56f0561e00780e65ccd38d Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 10 Sep 2025 16:37:42 +0200
Subject: [PATCH 36/46] fix tests

---
 test/test_conditional_feature_importance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_conditional_feature_importance.py b/test/test_conditional_feature_importance.py
index ce87b326a..9e08945f5 100644
--- a/test/test_conditional_feature_importance.py
+++ b/test/test_conditional_feature_importance.py
@@ -78,7 +78,6 @@ def run_cfi(X, y, n_permutation, seed):
     ("HiDim", 150, 200, 10, 0.0, 42, 1.0, np.inf, 0.0),
     ("HiDim with noise", 150, 200, 10, 0.0, 42, 1.0, 10.0, 0.0),
     ("HiDim with correlated noise", 150, 200, 10, 0.0, 42, 1.0, 10.0, 0.2),
-    ("HiDim with correlated features", 150, 200, 10, 0.2, 42, 1.0, np.inf, 0.0),
 ]
 
 
@@ -99,6 +98,7 @@ def test_linear_data_exact(data_generator, n_permutation, cfi_seed):
 
 
 parameter_partial = [
+    ("HiDim with correlated features", 150, 200, 10, 0.2, 42, 1.0, np.inf, 0.0),
     ("HiDim with correlated features and noise", 150, 200, 10, 0.2, 42, 1, 10, 0),
     (
         "HiDim with correlated features and correlated noise",

From f1faa3c4e489cff9f0008a4243a8c534bfd4c798 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 11 Sep 2025 10:31:35 +0200
Subject: [PATCH 37/46] Apply suggestions from code review

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 examples/plot_dcrt_example.py      | 2 +-
 examples/plot_fmri_data_example.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index c09a5785f..1a60ed1d8 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -50,7 +50,7 @@
         rho=rho,
         signal_noise_ratio=signal_noise_ratio,
         shuffle=True,
-        seed=sim_ind + 10,
+        seed=sim_ind,
     )
 
     # Applying a reLu function on the outcome y to get non-linear relationships
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index 8f2a3bb19..3b43a7b10 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -68,7 +68,7 @@
 
 # Limit the ressoruce use for the example to 5 G.
 resource.setrlimit(resource.RLIMIT_AS, (int(5 * 1e9), int(5 * 1e9)))
-# Set the number of job for the methods
+# Set the number of jobs for parallel computations
 n_job = 1
 
 
@@ -173,7 +173,7 @@ def preprocess_haxby(subject=2, memory=None):
     scaler_sampling=StandardScaler(),
     tolerance=1e-2,
     seed=1,
-    n_jobs=n_job,
+    n_jobs=n_jobs,
 )
 beta_hat, pval_cdl, _, one_minus_pval_cdl, _ = clustered_inference_pvalue(
     X.shape[0], None, ward_, beta_hat, theta_hat, omega_diag
@@ -199,7 +199,7 @@ def preprocess_haxby(subject=2, memory=None):
         max_iteration=6000,
         tolerance=1e-2,
         seed=2,
-        n_jobs=n_job,
+        n_jobs=n_jobs,
     )
 )
 beta_hat, selected = ensemble_clustered_inference_pvalue(

From 8ff07ea516a86dc9b2bc38191030852013ac834d Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 11 Sep 2025 10:33:19 +0200
Subject: [PATCH 38/46] Apply suggestions from code review

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 examples/plot_fmri_data_example.py               | 4 ++--
 examples/plot_pitfalls_permutation_importance.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index 3b43a7b10..62ba660e7 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -69,7 +69,7 @@
 # Limit the ressoruce use for the example to 5 G.
 resource.setrlimit(resource.RLIMIT_AS, (int(5 * 1e9), int(5 * 1e9)))
 # Set the number of jobs for parallel computations
-n_job = 1
+n_jobs = 1
 
 
 #############################################################################
@@ -152,7 +152,7 @@ def preprocess_haxby(subject=2, memory=None):
 # feature aggregation methods.
 try:
     beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
-        X, y, noise_method="median", max_iteration=1000, seed=0, n_jobs=n_job
+        X, y, noise_method="median", max_iteration=1000, seed=0, n_jobs=n_jobs
     )
     pval_dl, _, one_minus_pval_dl, _, cb_min, cb_max = desparsified_lasso_pvalue(
         X.shape[0], beta_hat, sigma_hat, precision_diagonal
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 4563cc700..99fcf3ecb 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -195,7 +195,7 @@
         model_c,
         imputation_model_continuous=RidgeCV(
             alphas=np.logspace(-3, 3, 5),
-            cv=KFold(shuffle=True, random_state=6),
+            cv=KFold(n_splits=3),
         ),
         random_state=7,
         n_jobs=5,
@@ -258,7 +258,7 @@
 
 conditional_sampler = ConditionalSampler(
     model_regression=RidgeCV(
-        alphas=np.logspace(-3, 3, 5), cv=KFold(shuffle=True, random_state=9)
+        alphas=np.logspace(-3, 3, 5), cv=KFold(n_splits=3)
     ),
     random_state=10,
 )

From dfc1c9bd5872c762d10e6a66bcf5b3affdd24d09 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 11 Sep 2025 10:34:03 +0200
Subject: [PATCH 39/46] Apply suggestions from code review

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 examples/plot_pitfalls_permutation_importance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 99fcf3ecb..2bf66a429 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -119,7 +119,7 @@
 # testing conditional importance, as it identifies the spurious feature as important.
 permutation_importances = []
 conditional_permutation_importances = []
-kf = KFold(n_splits=5, shuffle=True, random_state=3)
+kf = KFold(n_splits=5)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]
@@ -183,7 +183,7 @@
 # explained by the other features unchanged. This method is valid for testing conditional
 # importance. As shown below, it does not identify the spurious feature as important.
 conditional_importances = []
-kf = KFold(n_splits=5, shuffle=True, random_state=3)
+kf = KFold(n_splits=5)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     X_train, X_test = X[train_index], X[test_index]
     y_train, y_test = y[train_index], y[test_index]

From a232fa62e136cc35a262fc5f7f0ffb96b8ce6445 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 11 Sep 2025 10:35:12 +0200
Subject: [PATCH 40/46] format document

---
 examples/plot_pitfalls_permutation_importance.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 2bf66a429..d408ac472 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -257,9 +257,7 @@
 )
 
 conditional_sampler = ConditionalSampler(
-    model_regression=RidgeCV(
-        alphas=np.logspace(-3, 3, 5), cv=KFold(n_splits=3)
-    ),
+    model_regression=RidgeCV(alphas=np.logspace(-3, 3, 5), cv=KFold(n_splits=3)),
     random_state=10,
 )
 

From 31ba5f5840ab57e4f68f6ae4fdd967063a44abfb Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 10:49:22 +0200
Subject: [PATCH 41/46] fix example

---
 .../plot_conditional_vs_marginal_xor_data.py  |  2 +-
 examples/plot_dcrt_example.py                 | 21 ++-----------------
 examples/plot_fmri_data_example.py            |  2 +-
 3 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 3a2c6a804..1351c80c6 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -77,7 +77,7 @@
 # The decision function of the SVC shows that the model is able to learn the
 # non-linear decision boundary of the XOR problem. It also highlights that knowing
 # the value of both features is necessary to classify each sample correctly.
-examples / plot_2D_simulation_example.py
+
 
 # %%
 # Computing the conditional and marginal importance
diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
index bcb8e1aaf..0a8b67241 100644
--- a/examples/plot_dcrt_example.py
+++ b/examples/plot_dcrt_example.py
@@ -54,15 +54,9 @@
 
     ## dcrt Lasso ##
     d0crt_lasso = D0CRT(
-<<<<<<< HEAD
         estimator=LassoCV(random_state=sim_ind, n_jobs=1),
-        screening=False,
+        screening_threshold=None,
         random_state=sim_ind,
-||||||| 57d40de4
-    d0crt_lasso = D0CRT(estimator=LassoCV(random_state=42, n_jobs=1), screening=False)
-=======
-        estimator=LassoCV(random_state=42, n_jobs=1), screening_threshold=None
->>>>>>> main
     )
     d0crt_lasso.fit_importance(X, y)
     pvals_lasso = d0crt_lasso.pvalues_
@@ -78,21 +72,10 @@
     ## dcrt Random Forest ##
     d0crt_random_forest = D0CRT(
         estimator=RandomForestRegressor(
-<<<<<<< HEAD
             n_estimators=100, random_state=sim_ind, n_jobs=1
         ),
-        screening=False,
-        random_state=sim_ind,
-||||||| 57d40de4
-        estimator=RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=1),
-        screening=False,
-=======
-            n_estimators=100,
-            random_state=42,
-            n_jobs=1,
-        ),
         screening_threshold=None,
->>>>>>> main
+        random_state=sim_ind,
     )
     d0crt_random_forest.fit_importance(X, y)
     pvals_forest = d0crt_random_forest.pvalues_
diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py
index d7ef14398..09c5ec738 100644
--- a/examples/plot_fmri_data_example.py
+++ b/examples/plot_fmri_data_example.py
@@ -64,7 +64,7 @@
 new_soft_limit = limit_5G if soft < 0 else min(limit_5G, soft)
 new_hard_limit = limit_5G if hard < 0 else min(limit_5G, hard)
 resource.setrlimit(resource.RLIMIT_AS, (new_soft_limit, new_hard_limit))
-n_job = 1
+n_jobs = 1
 
 
 # %%

From 08fa0d4bf0ae61cd1dc37787ebb385173b3057a9 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 11:20:28 +0200
Subject: [PATCH 42/46] fix language

---
 tools/examples/debugger_script/try_reproducibility.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/examples/debugger_script/try_reproducibility.py b/tools/examples/debugger_script/try_reproducibility.py
index 95d3c804a..462a0ddf1 100644
--- a/tools/examples/debugger_script/try_reproducibility.py
+++ b/tools/examples/debugger_script/try_reproducibility.py
@@ -12,6 +12,6 @@ def run_joblib(i):
     import plot_knockoff_aggregation  # include the example to test
 
 
-# run in parralel the same example for compare result
-parrallel = Parallel(n_jobs=4)
-parrallel(delayed(run_joblib)(i) for i in range(6))
+# run in parallel the same example for compare result
+parallel = Parallel(n_jobs=4)
+parallel(delayed(run_joblib)(i) for i in range(6))

From aba9798b501e42b75434ad6b670643b332aa0fd4 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 11:21:59 +0200
Subject: [PATCH 43/46] fix order import

---
 examples/plot_knockoffs_wisconsin.py                  | 1 -
 tools/examples/debugger_script/try_reproducibility.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index f028d1a02..c6ee46928 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -12,7 +12,6 @@
 import numpy as np
 import pandas as pd
 
-
 # Define the seeds for the reproducibility of the example
 rng = np.random.RandomState(43)
 
diff --git a/tools/examples/debugger_script/try_reproducibility.py b/tools/examples/debugger_script/try_reproducibility.py
index 462a0ddf1..070376b03 100644
--- a/tools/examples/debugger_script/try_reproducibility.py
+++ b/tools/examples/debugger_script/try_reproducibility.py
@@ -1,7 +1,6 @@
 import os
 import sys
 
-
 from joblib import Parallel, delayed
 
 # add the example for import them

From ce15242d1fcbee2ec977bab5c1358305af62f311 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 16:48:17 +0200
Subject: [PATCH 44/46] Apply suggestions from code review

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 examples/plot_importance_classification_iris.py | 2 +-
 examples/plot_knockoff_aggregation.py           | 2 +-
 examples/plot_knockoffs_wisconsin.py            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/plot_importance_classification_iris.py b/examples/plot_importance_classification_iris.py
index acf790f49..3d33fc2e5 100644
--- a/examples/plot_importance_classification_iris.py
+++ b/examples/plot_importance_classification_iris.py
@@ -37,7 +37,7 @@
 from hidimstat import CFI, PFI
 
 # Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(0)
+rng = np.random.default_rng(0)
 # %%
 # Load the iris dataset and add a spurious feature
 # ------------------------------------------------
diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py
index 1423715d4..8794cba82 100644
--- a/examples/plot_knockoff_aggregation.py
+++ b/examples/plot_knockoff_aggregation.py
@@ -58,7 +58,7 @@
 # verbosity of the joblib
 joblib_verbose = 0
 # Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(42)
+rng = np.random.default_rng(42)
 
 
 # %%
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index c6ee46928..070196f64 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -13,7 +13,7 @@
 import pandas as pd
 
 # Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(43)
+rng = np.random.default_rng(43)
 
 
 # %%

From 368c0d87c036dd32766e0b7e980d6f8ef6ef11d2 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 16:55:24 +0200
Subject: [PATCH 45/46] chnage random state to generator

---
 examples/plot_conditional_vs_marginal_xor_data.py     | 2 +-
 examples/plot_diabetes_variable_importance_example.py | 4 ++--
 examples/plot_model_agnostic_importance.py            | 2 +-
 examples/plot_pitfalls_permutation_importance.py      | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 1351c80c6..34fa662b9 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -21,7 +21,7 @@
 # %%
 # To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel.
 #
-rng = np.random.RandomState(0)
+rng = np.random.default_rng(0)
 X = rng.randn(400, 2)
 Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype(int)
 
diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py
index b1dfd940f..603df59ec 100644
--- a/examples/plot_diabetes_variable_importance_example.py
+++ b/examples/plot_diabetes_variable_importance_example.py
@@ -89,7 +89,7 @@
     print(f"Fold {i}: {mse=}")
 
 # %%
-# Fit a baselien model on the diabetes dataset
+# Fit a baseline model on the diabetes dataset
 # --------------------------------------------
 # We use a Ridge regression model with a 10-fold cross-validation to fit the
 # diabetes dataset.
@@ -97,7 +97,7 @@
 n_folds = 10
 regressor = RidgeCV(alphas=np.logspace(-3, 3, 10))
 regressor_list = [clone(regressor) for _ in range(n_folds)]
-kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
+kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
 for i, (train_index, test_index) in enumerate(kf.split(X)):
     regressor_list[i].fit(X[train_index], y[train_index])
     score = r2_score(
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 140067eae..5bfdb90e9 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -40,7 +40,7 @@
 # %%
 # Generate data where classes are not linearly separable
 # ------------------------------------------------------
-rng = np.random.RandomState(0)
+rng = np.random.default_rng(0)
 X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=rng)
 
 
diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 0eaf090b7..20b2cfaf0 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -29,7 +29,7 @@
 from hidimstat.conditional_sampling import ConditionalSampler
 
 # Define the seeds for the reproducibility of the example
-rng = np.random.RandomState(0)
+rng = np.random.default_rng(0)
 
 # %%
 # Load the California housing dataset and add a spurious feature

From c8734d1cda42b5a14271bb8afe48a30f8b7192f4 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 23 Sep 2025 17:03:53 +0200
Subject: [PATCH 46/46] fix example

---
 examples/plot_conditional_vs_marginal_xor_data.py | 2 +-
 examples/plot_knockoffs_wisconsin.py              | 4 ++--
 examples/plot_model_agnostic_importance.py        | 7 ++++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/examples/plot_conditional_vs_marginal_xor_data.py b/examples/plot_conditional_vs_marginal_xor_data.py
index 34fa662b9..0c4f90cbb 100644
--- a/examples/plot_conditional_vs_marginal_xor_data.py
+++ b/examples/plot_conditional_vs_marginal_xor_data.py
@@ -22,7 +22,7 @@
 # To solve the XOR problem, we will use a Support Vector Classier (SVC) with Radial Basis Function (RBF) kernel.
 #
 rng = np.random.default_rng(0)
-X = rng.randn(400, 2)
+X = rng.standard_normal((400, 2))
 Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0).astype(int)
 
 xx, yy = np.meshgrid(
diff --git a/examples/plot_knockoffs_wisconsin.py b/examples/plot_knockoffs_wisconsin.py
index 070196f64..6f03ecb71 100644
--- a/examples/plot_knockoffs_wisconsin.py
+++ b/examples/plot_knockoffs_wisconsin.py
@@ -84,8 +84,8 @@
 for k in range(repeats_noise):
     X_train_c = X_train.copy()
     X_test_c = X_test.copy()
-    noises_train.append(X_train_c + 2 * rng.randn(n_train, p))
-    noises_test.append(X_test_c + 2 * rng.randn(n_test, p))
+    noises_train.append(X_train_c + 2 * rng.standard_normal((n_train, p)))
+    noises_test.append(X_test_c + 2 * rng.standard_normal((n_test, p)))
     feature_names_noise += [f"spurious #{k*p+i}" for i in range(p)]
 
 noisy_train = np.concatenate(noises_train, axis=1)
diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
index 5bfdb90e9..6ac08751e 100644
--- a/examples/plot_model_agnostic_importance.py
+++ b/examples/plot_model_agnostic_importance.py
@@ -41,7 +41,12 @@
 # Generate data where classes are not linearly separable
 # ------------------------------------------------------
 rng = np.random.default_rng(0)
-X, y = make_circles(n_samples=500, noise=0.1, factor=0.6, random_state=rng)
+X, y = make_circles(
+    n_samples=500,
+    noise=0.1,
+    factor=0.6,
+    random_state=np.random.RandomState(rng.bit_generator),
+)
 
 
 fig, ax = plt.subplots()