From 190300dbfea2ea34caf1598a17e9b08213da2963 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 13:16:37 +0200
Subject: [PATCH 01/44] gaussian distribution reformat

---
 src/hidimstat/gaussian_knockoff.py            | 191 -----------------
 .../gaussian_distribution.py                  | 194 ++++++++++++++++++
 .../test_gaussian_distribution.py             |  85 ++++++++
 3 files changed, 279 insertions(+), 191 deletions(-)
 delete mode 100644 src/hidimstat/gaussian_knockoff.py
 create mode 100644 src/hidimstat/statistical_tools/gaussian_distribution.py
 create mode 100644 test/statistical_tools/test_gaussian_distribution.py

diff --git a/src/hidimstat/gaussian_knockoff.py b/src/hidimstat/gaussian_knockoff.py
deleted file mode 100644
index 886312c7e..000000000
--- a/src/hidimstat/gaussian_knockoff.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import warnings
-
-import numpy as np
-
-
-def gaussian_knockoff_generation(X, mu, sigma, seed=None, tol=1e-14):
-    """
-    Generate second-order knockoff variables using the equi-correlated method.
-
-    This function generates knockoff variables for a given design matrix X,
-    using the equi-correlated method described in :footcite:t:`barber2015controlling`
-    and :footcite:t:`candes2018panning`. The function takes as input the design matrix
-    X, the vector of empirical mean values mu, and the empirical covariance
-    matrix sigma. It returns the knockoff variables X_tilde.
-
-    The original implementation can be found at
-    https://github.com/msesia/knockoff-filter/blob/master/R/knockoff/R/create_gaussian.R
-
-    Parameters
-    ----------
-    X: 2D ndarray (n_samples, n_features)
-        The original design matrix.
-
-    mu : 1D ndarray (n_features, )
-        A vector of empirical mean values.
-
-    sigma : 2D ndarray (n_samples, n_features)
-        The empirical covariance matrix.
-
-    seed : int, optional
-        A random seed for generating the uniform noise used to create
-        the knockoff variables.
-
-    tol : float, default=1.e-14
-        A tolerance value used for numerical stability in the calculation
-        of the Cholesky decomposition.
-
-    Returns
-    -------
-    X_tilde : 2D ndarray (n_samples, n_features)
-        The knockoff variables.
-
-    mu_tilde : 2D ndarray (n_samples, n_features)
-        The mean matrix used for generating knockoffs.
-
-    sigma_tilde_decompose : 2D ndarray (n_features, n_features)
-        The Cholesky decomposition of the covariance matrix.
-
-    References
-    ----------
-    .. footbibliography::
-    """
-    n_samples, n_features = X.shape
-
-    # create a uniform noise for all the data
-    rng = np.random.RandomState(seed)
-    u_tilde = rng.randn(n_samples, n_features)
-
-    diag_s = np.diag(_s_equi(sigma, tol=tol))
-
-    sigma_inv_s = np.linalg.solve(sigma, diag_s)
-
-    # First part on the RHS of equation 1.4 in barber2015controlling
-    mu_tilde = X - np.dot(X - mu, sigma_inv_s)
-    # To calculate the Cholesky decomposition later on
-    sigma_tilde = 2 * diag_s - diag_s.dot(sigma_inv_s)
-    # test is the matrix is positive definite
-    while not np.all(np.linalg.eigvalsh(sigma_tilde) > tol):
-        sigma_tilde += 1e-10 * np.eye(n_features)
-        warnings.warn(
-            "The conditional covariance matrix for knockoffs is not positive "
-            "definite. Adding minor positive value to the matrix.",
-            UserWarning,
-        )
-
-    # Equation 1.4 in barber2015controlling
-    sigma_tilde_decompose = np.linalg.cholesky(sigma_tilde)
-    X_tilde = mu_tilde + np.dot(u_tilde, sigma_tilde_decompose)
-
-    return X_tilde, mu_tilde, sigma_tilde_decompose
-
-
-def repeat_gaussian_knockoff_generation(mu_tilde, sigma_tilde_decompose, seed):
-    """
-    Generate additional knockoff variables using pre-computed values.
-
-    This function generates additional knockoff variables using pre-computed
-    values returned by the gaussian_knockoff_generation function
-    with repeat=True. It takes as input mu_tilde and sigma_tilde_decompose,
-    which were returned by gaussian_knockoff_generation, and a random seed.
-    It returns the new knockoff variables X_tilde.
-
-    Parameters
-    ----------
-    mu_tilde : 2D ndarray (n_samples, n_features)
-        The matrix of means used to generate the knockoff variables,
-        returned by gaussian_knockoff_generation.
-
-    sigma_tilde_decompose : 2D ndarray (n_features, n_features)
-        The Cholesky decomposition of the covariance matrix used
-        to generate the knockoff variables,returned by
-        gaussian_knockoff_generation.
-
-    seed : int
-        A random seed for generating the uniform noise used to create
-        the knockoff variables.
-
-    Returns
-    -------
-    X_tilde : 2D ndarray (n_samples, n_features)
-        The knockoff variables.
-    """
-    n_samples, n_features = mu_tilde.shape
-
-    # create a uniform noise for all the data
-    rng = np.random.RandomState(seed)
-    u_tilde = rng.randn(n_samples, n_features)
-
-    X_tilde = mu_tilde + np.dot(u_tilde, sigma_tilde_decompose)
-    return X_tilde
-
-
-def _s_equi(sigma, tol=1e-14):
-    """
-    Estimate the diagonal matrix of correlation between real
-    and knockoff variables using the equi-correlated equation.
-
-    This function estimates the diagonal matrix of correlation
-    between real and knockoff variables using the equi-correlated
-    equation described in :footcite:t:`barber2015controlling` and
-    :footcite:t:`candes2018panning`. It takes as input the empirical
-    covariance matrix sigma and a tolerance value tol,
-    and returns a vector of diagonal values of the estimated
-    matrix diag{s}.
-
-    Parameters
-    ----------
-    sigma : 2D ndarray (n_features, n_features)
-        The empirical covariance matrix calculated from
-        the original design matrix.
-
-    tol : float, optional
-        A tolerance value used for numerical stability in the calculation
-        of the eigenvalues of the correlation matrix.
-
-    Returns
-    -------
-    1D ndarray (n_features, )
-        A vector of diagonal values of the estimated matrix diag{s}.
-
-    Raises
-    ------
-    Exception
-        If the covariance matrix is not positive-definite.
-    """
-    n_features = sigma.shape[0]
-
-    # Convert covariance matrix to correlation matrix
-    # as example see cov2corr from statsmodels
-    features_std = np.sqrt(np.diag(sigma))
-    scale = np.outer(features_std, features_std)
-    corr_matrix = sigma / scale
-
-    eig_value = np.linalg.eigvalsh(corr_matrix)
-    lambda_min = np.min(eig_value[0])
-    # check if the matrix is positive-defined
-    if lambda_min <= 0:
-        raise Exception("The covariance matrix is not positive-definite.")
-
-    s = np.ones(n_features) * min(2 * lambda_min, 1)
-
-    psd = np.all(np.linalg.eigvalsh(2 * corr_matrix - np.diag(s)) > tol)
-    s_eps = 0
-    while not psd:
-        if s_eps == 0:
-            s_eps = np.finfo(type(s[0])).eps  # avoid zeros
-        else:
-            s_eps *= 10
-        # if all eigval > 0 then the matrix is positive define
-        psd = np.all(
-            np.linalg.eigvalsh(2 * corr_matrix - np.diag(s * (1 - s_eps))) > tol
-        )
-        warnings.warn(
-            "The equi-correlated matrix for knockoffs is not positive "
-            f"definite. Reduce the value of distance by {s_eps}.",
-            UserWarning,
-        )
-
-    s = s * (1 - s_eps)
-
-    return s * np.diag(sigma)
diff --git a/src/hidimstat/statistical_tools/gaussian_distribution.py b/src/hidimstat/statistical_tools/gaussian_distribution.py
new file mode 100644
index 000000000..207f988be
--- /dev/null
+++ b/src/hidimstat/statistical_tools/gaussian_distribution.py
@@ -0,0 +1,194 @@
+import warnings
+
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
+
+
+class GaussianDistribution:
+    """
+    Generator for second-order Gaussian variables using the equi-correlated method.
+    Creates synthetic variables that preserve the covariance structure of the original
+    variables while ensuring conditional independence between the original and synthetic data.
+    Parameters
+    ----------
+    cov_estimator : object
+        Estimator for computing the covariance matrix. Must implement fit and
+        have a covariance_ attribute after fitting.
+    random_state : int or None, default=None
+        Random seed for generating synthetic data.
+    centered : bool, default=False
+        Whether to center and scale the input data before generating synthetic variables.
+    tol : float, default=1e-14
+        Tolerance for numerical stability in matrix computations.
+    Attributes
+    ----------
+    mu_tilde_ : ndarray of shape (n_samples, n_features)
+        Mean matrix for generating synthetic variables.
+    sigma_tilde_decompose_ : ndarray of shape (n_features, n_features)
+        Cholesky decomposition of the synthetic covariance matrix.
+    References
+    ----------
+    .. footbibliography::
+    """
+
+    def __init__(self, cov_estimator, random_state=None, centered=False, tol=1e-14):
+        self.cov_estimator = cov_estimator
+        self.centered = centered
+        self.tol = tol
+        self.rng = check_random_state(random_state)
+
+    def fit(self, X):
+        """
+        Fit the Gaussian synthetic variable generator.
+        This method estimates the parameters needed to generate Gaussian synthetic variables
+        based on the input data. It follows a methodology for creating second-order
+        synthetic variables that preserve the covariance structure.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples used to estimate the parameters for synthetic variable generation.
+            The data is assumed to follow a Gaussian distribution.
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        Notes
+        -----
+        The method implements the following steps:
+        1. Centers and scales the data if specified
+        2. Estimates mean and covariance of input data
+        3. Computes parameters for synthetic variable generation
+        """
+        _, n_features = X.shape
+        if self.centered:
+            X_ = StandardScaler().fit_transform(X)
+        else:
+            X_ = X
+
+        # estimation of X distribution
+        # original implementation:
+        # https://github.com/msesia/knockoff-filter/blob/master/R/knockoff/R/create_second_order.R
+        mu = X_.mean(axis=0)
+        sigma = self.cov_estimator.fit(X_).covariance_
+
+        diag_s = np.diag(_s_equi(sigma, tol=self.tol))
+
+        sigma_inv_s = np.linalg.solve(sigma, diag_s)
+
+        # First part on the RHS of equation 1.4 in barber2015controlling
+        self.mu_tilde_ = X - np.dot(X - mu, sigma_inv_s)
+        # To calculate the Cholesky decomposition later on
+        sigma_tilde = 2 * diag_s - diag_s.dot(sigma_inv_s)
+        # test is the matrix is positive definite
+        while not np.all(np.linalg.eigvalsh(sigma_tilde) > self.tol):
+            sigma_tilde += 1e-10 * np.eye(n_features)
+            warnings.warn(
+                "The conditional covariance matrix for knockoffs is not positive "
+                "definite. Adding minor positive value to the matrix.",
+                UserWarning,
+            )
+
+        self.sigma_tilde_decompose_ = np.linalg.cholesky(sigma_tilde)
+
+        return self
+
+    def _check_fit(self):
+        """
+        Check if the model has been fit before performing analysis.
+        Raises
+        ------
+        ValueError
+            If any of the required attributes are missing, indicating the model
+            hasn't been fit before generating synthetic variables.
+        """
+        if not hasattr(self, "mu_tilde_") or not hasattr(
+            self, "sigma_tilde_decompose_"
+        ):
+            raise ValueError("The GaussianGenerator requires to be fit before simulate")
+
+    def sample(self):
+        """
+        Generate synthetic variables.
+        This function generates synthetic variables that preserve the covariance structure
+        of the original data while ensuring conditional independence.
+        Returns
+        -------
+        X_tilde : 2D ndarray (n_samples, n_features)
+            The synthetic variables.
+        """
+        self._check_fit()
+        n_samples, n_features = self.mu_tilde_.shape
+
+        # create a uniform noise for all the data
+        u_tilde = self.rng.randn(n_samples, n_features)
+
+        # Equation 1.4 in barber2015controlling
+        X_tilde = self.mu_tilde_ + np.dot(u_tilde, self.sigma_tilde_decompose_)
+        return X_tilde
+
+
+def _s_equi(sigma, tol=1e-14):
+    """
+    Estimate the diagonal matrix of correlation between real
+    and knockoff variables using the equi-correlated equation.
+    This function estimates the diagonal matrix of correlation
+    between real and knockoff variables using the equi-correlated
+    equation described in :footcite:t:`barber2015controlling` and
+    :footcite:t:`candes2018panning`. It takes as input the empirical
+    covariance matrix sigma and a tolerance value tol,
+    and returns a vector of diagonal values of the estimated
+    matrix diag{s}.
+    Parameters
+    ----------
+    sigma : 2D ndarray (n_features, n_features)
+        The empirical covariance matrix calculated from
+        the original design matrix.
+    tol : float, optional
+        A tolerance value used for numerical stability in the calculation
+        of the eigenvalues of the correlation matrix.
+    Returns
+    -------
+    1D ndarray (n_features, )
+        A vector of diagonal values of the estimated matrix diag{s}.
+    Raises
+    ------
+    Exception
+        If the covariance matrix is not positive-definite.
+    """
+    n_features = sigma.shape[0]
+
+    # Convert covariance matrix to correlation matrix
+    # as example see cov2corr from statsmodels
+    features_std = np.sqrt(np.diag(sigma))
+    scale = np.outer(features_std, features_std)
+    corr_matrix = sigma / scale
+
+    eig_value = np.linalg.eigvalsh(corr_matrix)
+    lambda_min = np.min(eig_value[0])
+    # check if the matrix is positive-defined
+    if lambda_min <= 0:
+        raise Exception("The covariance matrix is not positive-definite.")
+
+    s = np.ones(n_features) * min(2 * lambda_min, 1)
+
+    psd = np.all(np.linalg.eigvalsh(2 * corr_matrix - np.diag(s)) > tol)
+    s_eps = 0
+    while not psd:
+        if s_eps == 0:
+            s_eps = np.finfo(type(s[0])).eps  # avoid zeros
+        else:
+            s_eps *= 10
+        # if all eigval > 0 then the matrix is positive define
+        psd = np.all(
+            np.linalg.eigvalsh(2 * corr_matrix - np.diag(s * (1 - s_eps))) > tol
+        )
+        warnings.warn(
+            "The equi-correlated matrix for knockoffs is not positive "
+            f"definite. Reduce the value of distance by {s_eps}.",
+            UserWarning,
+        )
+
+    s = s * (1 - s_eps)
+
+    return s * np.diag(sigma)
diff --git a/test/statistical_tools/test_gaussian_distribution.py b/test/statistical_tools/test_gaussian_distribution.py
new file mode 100644
index 000000000..28c138114
--- /dev/null
+++ b/test/statistical_tools/test_gaussian_distribution.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+from hidimstat.statistical_tools.gaussian_distribution import (
+    _s_equi,
+    GaussianDistribution,
+)
+from hidimstat._utils.scenario import multivariate_simulation
+from sklearn.covariance import LedoitWolf
+
+
+def test_gaussian_equi():
+    """test function of gaussian"""
+    seed = 42
+    n = 100
+    p = 50
+    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
+    generator = GaussianDistribution(
+        cov_estimator=LedoitWolf(assume_centered=True),
+        random_state=seed * 2,
+        centered=False,
+    )
+    generator.fit(X=X)
+    X_tilde = generator.sample()
+    assert X_tilde.shape == (n, p)
+
+
+def test_gaussian_center():
+    """test function of gaussian"""
+    seed = 42
+    n = 100
+    p = 50
+    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
+    generator = GaussianDistribution(
+        cov_estimator=LedoitWolf(assume_centered=True),
+        random_state=seed * 2,
+        centered=True,
+    )
+    generator.fit(X=X)
+    X_tilde = generator.sample()
+    assert X_tilde.shape == (n, p)
+
+
+def test_gaussian_error():
+    """test function error"""
+    seed = 42
+    n = 100
+    p = 50
+    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
+    generator = GaussianDistribution(
+        cov_estimator=LedoitWolf(assume_centered=True),
+        random_state=seed * 2,
+        centered=True,
+    )
+    with pytest.raises(
+        ValueError, match="The GaussianGenerator requires to be fit before simulate"
+    ):
+        generator.sample()
+
+
+def test_s_equi_not_define_positive():
+    """test the warning and error of s_equi function"""
+    n = 10
+    tol = 1e-7
+    seed = 42
+
+    # random positive matrix
+    rgn = np.random.RandomState(seed)
+    a = rgn.randn(n, n)
+    a -= np.min(a)
+    with pytest.raises(
+        Exception, match="The covariance matrix is not positive-definite."
+    ):
+        _s_equi(a)
+
+    # matrix with positive eigenvalues, positive diagonal
+    while not np.all(np.linalg.eigvalsh(a) > tol):
+        a += 0.1 * np.eye(n)
+    with pytest.warns(UserWarning, match="The equi-correlated matrix"):
+        _s_equi(a)
+
+    # positive definite matrix
+    u, s, vh = np.linalg.svd(a)
+    d = np.eye(n)
+    sigma = u * d * u.T
+    _s_equi(sigma)

From b576522e33ba743c794ed35d6c35c5f1e352516f Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 13:30:42 +0200
Subject: [PATCH 02/44] fix knockoff

---
 src/hidimstat/knockoffs.py | 35 ++++++-----------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 7dab07aeb..7d7609b44 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -7,10 +7,7 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_memory
 
-from hidimstat.gaussian_knockoff import (
-    gaussian_knockoff_generation,
-    repeat_gaussian_knockoff_generation,
-)
+from hidimstat.statistical_tools.gaussian_distribution import GaussianDistribution
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
 from hidimstat.statistical_tools.aggregation import quantile_aggregation
 
@@ -196,32 +193,12 @@ def model_x_knockoff(
         raise TypeError("Wrong type for random_state")
     seed_list = rng.randint(1, np.iinfo(np.int32).max, n_bootstraps)
 
-    if centered:
-        X = StandardScaler().fit_transform(X)
-
-    # estimation of X distribution
-    # original implementation:
-    # https://github.com/msesia/knockoff-filter/blob/master/R/knockoff/R/create_second_order.R
-    mu = X.mean(axis=0)
-    sigma = cov_estimator.fit(X).covariance_
-
     # Create knockoff variables
-    X_tilde, mu_tilde, sigma_tilde_decompose = memory.cache(
-        gaussian_knockoff_generation
-    )(X, mu, sigma, seed=seed_list[0], tol=tol_gauss)
-
-    if n_bootstraps == 1:
-        X_tildes = [X_tilde]
-    else:
-        X_tildes = parallel(
-            delayed(repeat_gaussian_knockoff_generation)(
-                mu_tilde,
-                sigma_tilde_decompose,
-                seed=seed,
-            )
-            for seed in seed_list[1:]
-        )
-        X_tildes.insert(0, X_tilde)
+    conditionnal_sampler = GaussianDistribution(
+        cov_estimator, random_state=seed_list[0], centered=centered, tol=tol_gauss
+    )
+    conditionnal_sampler.fit(X)
+    X_tildes = [conditionnal_sampler.sample() for i in range(n_bootstraps)]
 
     results = parallel(
         delayed(memory.cache(_stat_coefficient_diff))(

From 8f4e03236600b255e28cee36557e6f7bc1b028c6 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 13:33:29 +0200
Subject: [PATCH 03/44] move conditional sampler in statistical tools

---
 examples/plot_pitfalls_permutation_importance.py              | 2 +-
 src/hidimstat/conditional_feature_importance.py               | 2 +-
 src/hidimstat/{ => statistical_tools}/conditional_sampling.py | 0
 test/test_conditional_sampling.py                             | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename src/hidimstat/{ => statistical_tools}/conditional_sampling.py (100%)

diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index af4deb83e..adc144a62 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -26,7 +26,7 @@
 from sklearn.preprocessing import StandardScaler
 
 from hidimstat import CFI, PFI
-from hidimstat.conditional_sampling import ConditionalSampler
+from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
 
 rng = np.random.RandomState(0)
 
diff --git a/src/hidimstat/conditional_feature_importance.py b/src/hidimstat/conditional_feature_importance.py
index 9b0e7905f..8c9686334 100644
--- a/src/hidimstat/conditional_feature_importance.py
+++ b/src/hidimstat/conditional_feature_importance.py
@@ -5,7 +5,7 @@
 from sklearn.utils.validation import check_random_state
 
 from hidimstat.base_perturbation import BasePerturbation
-from hidimstat.conditional_sampling import ConditionalSampler
+from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
 
 
 class CFI(BasePerturbation):
diff --git a/src/hidimstat/conditional_sampling.py b/src/hidimstat/statistical_tools/conditional_sampling.py
similarity index 100%
rename from src/hidimstat/conditional_sampling.py
rename to src/hidimstat/statistical_tools/conditional_sampling.py
diff --git a/test/test_conditional_sampling.py b/test/test_conditional_sampling.py
index d42f1f11c..7c347ff50 100644
--- a/test/test_conditional_sampling.py
+++ b/test/test_conditional_sampling.py
@@ -11,7 +11,7 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.preprocessing import StandardScaler
 
-from hidimstat.conditional_sampling import ConditionalSampler
+from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
 
 
 def test_continuous_case():

From 942941893329553504f5af81cf31e2e1c3bbce51 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 13:41:06 +0200
Subject: [PATCH 04/44] move test from knockoff

---
 test/test_knockoff.py | 70 -------------------------------------------
 1 file changed, 70 deletions(-)

diff --git a/test/test_knockoff.py b/test/test_knockoff.py
index 3b0e86aa1..8ec615772 100644
--- a/test/test_knockoff.py
+++ b/test/test_knockoff.py
@@ -4,7 +4,6 @@
     model_x_knockoff_bootstrap_e_value,
     model_x_knockoff_bootstrap_quantile,
 )
-from hidimstat.gaussian_knockoff import gaussian_knockoff_generation, _s_equi
 from hidimstat._utils.scenario import multivariate_simulation
 from hidimstat.statistical_tools.multiple_testing import fdp_power
 import numpy as np
@@ -235,72 +234,3 @@ def test_estimate_distribution():
     )
     for i in selected:
         assert np.any(i == non_zero)
-
-
-def test_gaussian_knockoff_equi():
-    """test function of gaussian knockoff"""
-    seed = 42
-    n = 100
-    p = 50
-    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    non_zero = np.where(beta)[0]
-    mu = X.mean(axis=0)
-    sigma = LedoitWolf(assume_centered=True).fit(X).covariance_
-
-    X_tilde, mu_tilde, sigma_tilde_decompose = gaussian_knockoff_generation(
-        X, mu, sigma, seed=seed * 2
-    )
-
-    assert X_tilde.shape == (n, p)
-
-
-def test_gaussian_knockoff_equi_warning():
-    "test warning in guassian knockoff"
-    seed = 42
-    n = 100
-    p = 50
-    tol = 1e-14
-    rgn = np.random.RandomState(seed)
-    X = rgn.randn(n, p)
-    mu = X.mean(axis=0)
-    # create a positive definite matrix
-    u, s, vh = np.linalg.svd(rgn.randn(p, p))
-    d = np.eye(p) * tol / 10
-    sigma = u * d * u.T
-    with pytest.warns(
-        UserWarning,
-        match="The conditional covariance matrix for knockoffs is not positive",
-    ):
-        X_tilde, mu_tilde, sigma_tilde_decompose = gaussian_knockoff_generation(
-            X, mu, sigma, seed=seed * 2, tol=tol
-        )
-
-    assert X_tilde.shape == (n, p)
-
-
-def test_s_equi_not_define_positive():
-    """test the warning and error of s_equi function"""
-    n = 10
-    tol = 1e-7
-    seed = 42
-
-    # random positive matrix
-    rgn = np.random.RandomState(seed)
-    a = rgn.randn(n, n)
-    a -= np.min(a)
-    with pytest.raises(
-        Exception, match="The covariance matrix is not positive-definite."
-    ):
-        _s_equi(a)
-
-    # matrix with positive eigenvalues, positive diagonal
-    while not np.all(np.linalg.eigvalsh(a) > tol):
-        a += 0.1 * np.eye(n)
-    with pytest.warns(UserWarning, match="The equi-correlated matrix"):
-        _s_equi(a)
-
-    # positive definite matrix
-    u, s, vh = np.linalg.svd(a)
-    d = np.eye(n)
-    sigma = u * d * u.T
-    _s_equi(sigma)

From 81f2fb06d70f5e8f0b836b7b97bb20c0d524d522 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 13:43:51 +0200
Subject: [PATCH 05/44] move test in right folder

---
 test/{ => statistical_tools}/test_conditional_sampling.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/{ => statistical_tools}/test_conditional_sampling.py (100%)

diff --git a/test/test_conditional_sampling.py b/test/statistical_tools/test_conditional_sampling.py
similarity index 100%
rename from test/test_conditional_sampling.py
rename to test/statistical_tools/test_conditional_sampling.py

From 17eabee45e0795c731abfdd0de84d5a01d60fee4 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 18:51:21 +0200
Subject: [PATCH 06/44] remove gaussian

---
 .../statistical_tools/gaussian_distribution.py      | 13 +++----------
 .../statistical_tools/test_gaussian_distribution.py |  3 ---
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_distribution.py b/src/hidimstat/statistical_tools/gaussian_distribution.py
index 207f988be..7aad0ba68 100644
--- a/src/hidimstat/statistical_tools/gaussian_distribution.py
+++ b/src/hidimstat/statistical_tools/gaussian_distribution.py
@@ -17,8 +17,6 @@ class GaussianDistribution:
         have a covariance_ attribute after fitting.
     random_state : int or None, default=None
         Random seed for generating synthetic data.
-    centered : bool, default=False
-        Whether to center and scale the input data before generating synthetic variables.
     tol : float, default=1e-14
         Tolerance for numerical stability in matrix computations.
     Attributes
@@ -32,9 +30,8 @@ class GaussianDistribution:
     .. footbibliography::
     """
 
-    def __init__(self, cov_estimator, random_state=None, centered=False, tol=1e-14):
+    def __init__(self, cov_estimator, random_state=None, tol=1e-14):
         self.cov_estimator = cov_estimator
-        self.centered = centered
         self.tol = tol
         self.rng = check_random_state(random_state)
 
@@ -61,16 +58,12 @@ def fit(self, X):
         3. Computes parameters for synthetic variable generation
         """
         _, n_features = X.shape
-        if self.centered:
-            X_ = StandardScaler().fit_transform(X)
-        else:
-            X_ = X
 
         # estimation of X distribution
         # original implementation:
         # https://github.com/msesia/knockoff-filter/blob/master/R/knockoff/R/create_second_order.R
-        mu = X_.mean(axis=0)
-        sigma = self.cov_estimator.fit(X_).covariance_
+        mu = X.mean(axis=0)
+        sigma = self.cov_estimator.fit(X).covariance_
 
         diag_s = np.diag(_s_equi(sigma, tol=self.tol))
 
diff --git a/test/statistical_tools/test_gaussian_distribution.py b/test/statistical_tools/test_gaussian_distribution.py
index 28c138114..ba586dc1b 100644
--- a/test/statistical_tools/test_gaussian_distribution.py
+++ b/test/statistical_tools/test_gaussian_distribution.py
@@ -17,7 +17,6 @@ def test_gaussian_equi():
     generator = GaussianDistribution(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
-        centered=False,
     )
     generator.fit(X=X)
     X_tilde = generator.sample()
@@ -33,7 +32,6 @@ def test_gaussian_center():
     generator = GaussianDistribution(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
-        centered=True,
     )
     generator.fit(X=X)
     X_tilde = generator.sample()
@@ -49,7 +47,6 @@ def test_gaussian_error():
     generator = GaussianDistribution(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
-        centered=True,
     )
     with pytest.raises(
         ValueError, match="The GaussianGenerator requires to be fit before simulate"

From 0bac34a15f89b356536e27a3758514c74b61a19a Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 18:53:43 +0200
Subject: [PATCH 07/44] remove unsed import

---
 src/hidimstat/statistical_tools/gaussian_distribution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_distribution.py b/src/hidimstat/statistical_tools/gaussian_distribution.py
index 7aad0ba68..e0fafeb76 100644
--- a/src/hidimstat/statistical_tools/gaussian_distribution.py
+++ b/src/hidimstat/statistical_tools/gaussian_distribution.py
@@ -1,7 +1,6 @@
 import warnings
 
 import numpy as np
-from sklearn.preprocessing import StandardScaler
 from sklearn.utils import check_random_state
 
 
From b965f13ee086d6600dfbb5255f040c02f5ec2e79 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 29 Aug 2025 18:56:41 +0200
Subject: [PATCH 08/44] fix center to knockoff

---
 src/hidimstat/knockoffs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 7d7609b44..e7461e8b9 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -193,9 +193,12 @@ def model_x_knockoff(
         raise TypeError("Wrong type for random_state")
     seed_list = rng.randint(1, np.iinfo(np.int32).max, n_bootstraps)
 
+    if centered:
+        X = StandardScaler().fit_transform(X)
+
     # Create knockoff variables
     conditionnal_sampler = GaussianDistribution(
-        cov_estimator, random_state=seed_list[0], centered=centered, tol=tol_gauss
+        cov_estimator, random_state=seed_list[0], tol=tol_gauss
     )
     conditionnal_sampler.fit(X)
     X_tildes = [conditionnal_sampler.sample() for i in range(n_bootstraps)]

From 7e29c970c0c6bb4895701c313d09140969ab2724 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 1 Sep 2025 13:33:11 +0200
Subject: [PATCH 09/44] Change name of GaussianDistribution

---
 src/hidimstat/knockoffs.py                             |  4 ++--
 ...{gaussian_distribution.py => gaussian_knockoffs.py} |  2 +-
 ...sian_distribution.py => test_gaussian_knockoffs.py} | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)
 rename src/hidimstat/statistical_tools/{gaussian_distribution.py => gaussian_knockoffs.py} (99%)
 rename test/statistical_tools/{test_gaussian_distribution.py => test_gaussian_knockoffs.py} (90%)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index e7461e8b9..ecf6e4ba3 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -7,7 +7,7 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_memory
 
-from hidimstat.statistical_tools.gaussian_distribution import GaussianDistribution
+from hidimstat.statistical_tools.gaussian_knockoffs import GaussianKnockoffs
 from hidimstat.statistical_tools.multiple_testing import fdr_threshold
 from hidimstat.statistical_tools.aggregation import quantile_aggregation
 
@@ -197,7 +197,7 @@ def model_x_knockoff(
         X = StandardScaler().fit_transform(X)
 
     # Create knockoff variables
-    conditionnal_sampler = GaussianDistribution(
+    conditionnal_sampler = GaussianKnockoffs(
         cov_estimator, random_state=seed_list[0], tol=tol_gauss
     )
     conditionnal_sampler.fit(X)
diff --git a/src/hidimstat/statistical_tools/gaussian_distribution.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
similarity index 99%
rename from src/hidimstat/statistical_tools/gaussian_distribution.py
rename to src/hidimstat/statistical_tools/gaussian_knockoffs.py
index e0fafeb76..7f0387b03 100644
--- a/src/hidimstat/statistical_tools/gaussian_distribution.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -4,7 +4,7 @@
 from sklearn.utils import check_random_state
 
 
-class GaussianDistribution:
+class GaussianKnockoffs:
     """
     Generator for second-order Gaussian variables using the equi-correlated method.
     Creates synthetic variables that preserve the covariance structure of the original
diff --git a/test/statistical_tools/test_gaussian_distribution.py b/test/statistical_tools/test_gaussian_knockoffs.py
similarity index 90%
rename from test/statistical_tools/test_gaussian_distribution.py
rename to test/statistical_tools/test_gaussian_knockoffs.py
index ba586dc1b..936d19c53 100644
--- a/test/statistical_tools/test_gaussian_distribution.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
-from hidimstat.statistical_tools.gaussian_distribution import (
+from hidimstat.statistical_tools.gaussian_knockoffs import (
     _s_equi,
-    GaussianDistribution,
+    GaussianKnockoffs,
 )
 from hidimstat._utils.scenario import multivariate_simulation
 from sklearn.covariance import LedoitWolf
@@ -14,7 +14,7 @@ def test_gaussian_equi():
     n = 100
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    generator = GaussianDistribution(
+    generator = GaussianKnockoffs(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
     )
@@ -29,7 +29,7 @@ def test_gaussian_center():
     n = 100
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    generator = GaussianDistribution(
+    generator = GaussianKnockoffs(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
     )
@@ -44,7 +44,7 @@ def test_gaussian_error():
     n = 100
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    generator = GaussianDistribution(
+    generator = GaussianKnockoffs(
         cov_estimator=LedoitWolf(assume_centered=True),
         random_state=seed * 2,
     )

From 86542699d60122225e7c7af8612962b8bbd2e854 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 1 Sep 2025 13:35:57 +0200
Subject: [PATCH 10/44] remove parameter assumed_centerd of tests

---
 test/statistical_tools/test_gaussian_knockoffs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 936d19c53..ce3b269a3 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -15,7 +15,7 @@ def test_gaussian_equi():
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
     generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(assume_centered=True),
+        cov_estimator=LedoitWolf(),
         random_state=seed * 2,
     )
     generator.fit(X=X)
@@ -30,7 +30,7 @@ def test_gaussian_center():
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
     generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(assume_centered=True),
+        cov_estimator=LedoitWolf(),
         random_state=seed * 2,
     )
     generator.fit(X=X)
@@ -45,7 +45,7 @@ def test_gaussian_error():
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
     generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(assume_centered=True),
+        cov_estimator=LedoitWolf(),
         random_state=seed * 2,
     )
     with pytest.raises(

From afe9915e06f609fff0821648899ad33c1960b057 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 3 Sep 2025 17:07:19 +0200
Subject: [PATCH 11/44] change the way of using the randomgenerator

---
 src/hidimstat/knockoffs.py                        | 14 ++++----------
 .../statistical_tools/gaussian_knockoffs.py       |  5 +++--
 test/statistical_tools/test_gaussian_knockoffs.py | 15 ---------------
 3 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index ecf6e4ba3..86ae4d133 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -180,25 +180,19 @@ def model_x_knockoff(
     """
     assert n_bootstraps > 0, "the number of bootstraps should at least higher than 1"
     memory = check_memory(memory)
+    rng = check_random_state(random_state)
     # unnecessary to have n_jobs > number of bootstraps
     n_jobs = min(n_bootstraps, n_jobs)
     parallel = Parallel(n_jobs, verbose=joblib_verbose)
 
-    # get the seed for the different run
-    if isinstance(random_state, (int, np.int32, np.int64)):
-        rng = check_random_state(random_state)
-    elif random_state is None:
-        rng = check_random_state(0)
-    else:
-        raise TypeError("Wrong type for random_state")
-    seed_list = rng.randint(1, np.iinfo(np.int32).max, n_bootstraps)
-
     if centered:
         X = StandardScaler().fit_transform(X)
 
     # Create knockoff variables
     conditionnal_sampler = GaussianKnockoffs(
-        cov_estimator, random_state=seed_list[0], tol=tol_gauss
+        cov_estimator,
+        random_state=rng,
+        tol=tol_gauss,
     )
     conditionnal_sampler.fit(X)
     X_tildes = [conditionnal_sampler.sample() for i in range(n_bootstraps)]
diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 7f0387b03..8b88f6463 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -32,7 +32,7 @@ class GaussianKnockoffs:
     def __init__(self, cov_estimator, random_state=None, tol=1e-14):
         self.cov_estimator = cov_estimator
         self.tol = tol
-        self.rng = check_random_state(random_state)
+        self.random_state = random_state
 
     def fit(self, X):
         """
@@ -110,10 +110,11 @@ def sample(self):
             The synthetic variables.
         """
         self._check_fit()
+        rng = check_random_state(self.random_state)
         n_samples, n_features = self.mu_tilde_.shape
 
         # create a uniform noise for all the data
-        u_tilde = self.rng.randn(n_samples, n_features)
+        u_tilde = rng.randn(n_samples, n_features)
 
         # Equation 1.4 in barber2015controlling
         X_tilde = self.mu_tilde_ + np.dot(u_tilde, self.sigma_tilde_decompose_)
diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index ce3b269a3..d3801872f 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -23,21 +23,6 @@ def test_gaussian_equi():
     assert X_tilde.shape == (n, p)
 
 
-def test_gaussian_center():
-    """test function of gaussian"""
-    seed = 42
-    n = 100
-    p = 50
-    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(),
-        random_state=seed * 2,
-    )
-    generator.fit(X=X)
-    X_tilde = generator.sample()
-    assert X_tilde.shape == (n, p)
-
-
 def test_gaussian_error():
     """test function error"""
     seed = 42

From bd3b77af428391fa8590c68d29b6e1065dfe99ce Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 4 Sep 2025 11:23:53 +0200
Subject: [PATCH 12/44] Apply suggestions from code review

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 8b88f6463..6eae8a3ad 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -9,6 +9,7 @@ class GaussianKnockoffs:
     Generator for second-order Gaussian variables using the equi-correlated method.
     Creates synthetic variables that preserve the covariance structure of the original
     variables while ensuring conditional independence between the original and synthetic data.
+    
     Parameters
     ----------
     cov_estimator : object
@@ -40,6 +41,7 @@ def fit(self, X):
         This method estimates the parameters needed to generate Gaussian synthetic variables
         based on the input data. It follows a methodology for creating second-order
         synthetic variables that preserve the covariance structure.
+        
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)

From d8e9f5c81ac100a5a9c5e1769ee8c2af524d3e84 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 4 Sep 2025 11:25:04 +0200
Subject: [PATCH 13/44] reformat docstring

---
 .../statistical_tools/gaussian_knockoffs.py         | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 6eae8a3ad..9d4932cc2 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -9,7 +9,7 @@ class GaussianKnockoffs:
     Generator for second-order Gaussian variables using the equi-correlated method.
     Creates synthetic variables that preserve the covariance structure of the original
     variables while ensuring conditional independence between the original and synthetic data.
-    
+
     Parameters
     ----------
     cov_estimator : object
@@ -19,12 +19,14 @@ class GaussianKnockoffs:
         Random seed for generating synthetic data.
     tol : float, default=1e-14
         Tolerance for numerical stability in matrix computations.
+
     Attributes
     ----------
     mu_tilde_ : ndarray of shape (n_samples, n_features)
         Mean matrix for generating synthetic variables.
     sigma_tilde_decompose_ : ndarray of shape (n_features, n_features)
         Cholesky decomposition of the synthetic covariance matrix.
+
     References
     ----------
     .. footbibliography::
@@ -41,16 +43,18 @@ def fit(self, X):
         This method estimates the parameters needed to generate Gaussian synthetic variables
         based on the input data. It follows a methodology for creating second-order
         synthetic variables that preserve the covariance structure.
-        
+
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
             The input samples used to estimate the parameters for synthetic variable generation.
             The data is assumed to follow a Gaussian distribution.
+
         Returns
         -------
         self : object
             Returns the instance itself.
+
         Notes
         -----
         The method implements the following steps:
@@ -90,6 +94,7 @@ def fit(self, X):
     def _check_fit(self):
         """
         Check if the model has been fit before performing analysis.
+
         Raises
         ------
         ValueError
@@ -106,6 +111,7 @@ def sample(self):
         Generate synthetic variables.
         This function generates synthetic variables that preserve the covariance structure
         of the original data while ensuring conditional independence.
+
         Returns
         -------
         X_tilde : 2D ndarray (n_samples, n_features)
@@ -134,6 +140,7 @@ def _s_equi(sigma, tol=1e-14):
     covariance matrix sigma and a tolerance value tol,
     and returns a vector of diagonal values of the estimated
     matrix diag{s}.
+
     Parameters
     ----------
     sigma : 2D ndarray (n_features, n_features)
@@ -142,10 +149,12 @@ def _s_equi(sigma, tol=1e-14):
     tol : float, optional
         A tolerance value used for numerical stability in the calculation
         of the eigenvalues of the correlation matrix.
+
     Returns
     -------
     1D ndarray (n_features, )
         A vector of diagonal values of the estimated matrix diag{s}.
+
     Raises
     ------
     Exception

From 013bd636f127da32749e23719694975e76774463 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Thu, 4 Sep 2025 11:27:11 +0200
Subject: [PATCH 14/44] Update
 test/statistical_tools/test_gaussian_knockoffs.py

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 .../test_gaussian_knockoffs.py                | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index d3801872f..e59811f8b 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -65,3 +65,29 @@ def test_s_equi_not_define_positive():
     d = np.eye(n)
     sigma = u * d * u.T
     _s_equi(sigma)
+
+
+def test_reproducibility_sample():
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=0)
+    gaussian_sampler.fit(X=X)
+    X_tilde_1 = gaussian_sampler.sample()
+    X_tilde_2 = gaussian_sampler.sample()
+    assert np.array_equal(X_tilde_1, X_tilde_2)
+
+
+def test_randomness_sample():
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=None)
+    gaussian_sampler.fit(X=X)
+    X_tilde_1 = gaussian_sampler.sample()
+    X_tilde_2 = gaussian_sampler.sample()
+    assert not np.array_equal(X_tilde_1, X_tilde_2)
+
+    gaussian_sampler_rng = GaussianKnockoffs(
+        cov_estimator=LedoitWolf(), random_state=np.random.RandomState(0)
+    )
+    gaussian_sampler_rng.fit(X=X)
+    X_tilde_1 = gaussian_sampler_rng.sample()
+    X_tilde_2 = gaussian_sampler_rng.sample()
+    assert not np.array_equal(X_tilde_1, X_tilde_2)

From 140dc6ecbb697a8a5974f8c8f6b04d593bdb7554 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Thu, 4 Sep 2025 11:29:48 +0200
Subject: [PATCH 15/44] improve tests

---
 test/statistical_tools/test_gaussian_knockoffs.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index e59811f8b..f0500d25f 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -68,6 +68,7 @@ def test_s_equi_not_define_positive():
 
 
 def test_reproducibility_sample():
+    """Test the repeatability of the samples"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
     gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=0)
     gaussian_sampler.fit(X=X)
@@ -76,7 +77,8 @@ def test_reproducibility_sample():
     assert np.array_equal(X_tilde_1, X_tilde_2)
 
 
-def test_randomness_sample():
+def test_randomness_sample_no_seed():
+    """Test the non repeatability of the samples when no seed"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
     gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=None)
     gaussian_sampler.fit(X=X)
@@ -84,6 +86,10 @@ def test_randomness_sample():
     X_tilde_2 = gaussian_sampler.sample()
     assert not np.array_equal(X_tilde_1, X_tilde_2)
 
+
+def test_randomness_sample_rgn():
+    """Test the non repeatability of the samples when the usage of random generator"""
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
     gaussian_sampler_rng = GaussianKnockoffs(
         cov_estimator=LedoitWolf(), random_state=np.random.RandomState(0)
     )

From d26ce917c6e44d757592603376b93e683967bdb9 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Fri, 5 Sep 2025 11:16:22 +0200
Subject: [PATCH 16/44] Apply suggestions from code review

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 4 ++--
 test/statistical_tools/test_gaussian_knockoffs.py     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 9d4932cc2..74b5ffa61 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -93,7 +93,7 @@ def fit(self, X):
 
     def _check_fit(self):
         """
-        Check if the model has been fit before performing analysis.
+        Check if the model has been fit before sampling knockoffs.
 
         Raises
         ------
@@ -104,7 +104,7 @@ def _check_fit(self):
         if not hasattr(self, "mu_tilde_") or not hasattr(
             self, "sigma_tilde_decompose_"
         ):
-            raise ValueError("The GaussianGenerator requires to be fit before simulate")
+            raise ValueError("The GaussianGenerator requires to be fit before sampling")
 
     def sample(self):
         """
diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index f0500d25f..4dfddf163 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -13,7 +13,7 @@ def test_gaussian_equi():
     seed = 42
     n = 100
     p = 50
-    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
+    X, _, _, _ = multivariate_simulation(n, p, seed=seed)
     generator = GaussianKnockoffs(
         cov_estimator=LedoitWolf(),
         random_state=seed * 2,

From 87c4e6efb91af77a869b2e4bf66a8426a128318d Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 5 Sep 2025 11:24:43 +0200
Subject: [PATCH 17/44] add assert

---
 test/statistical_tools/test_gaussian_knockoffs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 4dfddf163..593c661de 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -64,7 +64,8 @@ def test_s_equi_not_define_positive():
     u, s, vh = np.linalg.svd(a)
     d = np.eye(n)
     sigma = u * d * u.T
-    _s_equi(sigma)
+    res = _s_equi(sigma)
+    np.testing.assert_equal(res / np.diag(sigma), np.ones_like(res))
 
 
 def test_reproducibility_sample():

From 90f8455e851e4f3e6974e649cd45c7a04a457d06 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 5 Sep 2025 11:31:53 +0200
Subject: [PATCH 18/44] fix tests

---
 test/statistical_tools/test_gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 593c661de..0ee606dae 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -34,7 +34,7 @@ def test_gaussian_error():
         random_state=seed * 2,
     )
     with pytest.raises(
-        ValueError, match="The GaussianGenerator requires to be fit before simulate"
+        ValueError, match="The GaussianGenerator requires to be fit before sampling"
     ):
         generator.sample()
 

From 785eebc109f9c427ade2c748b26a7a400bffeb4c Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 8 Sep 2025 11:21:39 +0200
Subject: [PATCH 19/44] Improve test

---
 .../test_gaussian_knockoffs.py                | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 0ee606dae..93b217958 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -13,7 +13,8 @@ def test_gaussian_equi():
     seed = 42
     n = 100
     p = 50
-    X, _, _, _ = multivariate_simulation(n, p, seed=seed)
+    rho = 1.0
+    X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
     generator = GaussianKnockoffs(
         cov_estimator=LedoitWolf(),
         random_state=seed * 2,
@@ -21,6 +22,41 @@ def test_gaussian_equi():
     generator.fit(X=X)
     X_tilde = generator.sample()
     assert X_tilde.shape == (n, p)
+    assert np.linalg.norm(X - X_tilde) < np.linalg.norm(X)
+
+
+def test_gaussian_equi_2():
+    """test function of gaussian"""
+    seed = 42
+    n = 100
+    p = 50
+    rho = 0.5
+    X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
+    generator = GaussianKnockoffs(
+        cov_estimator=LedoitWolf(),
+        random_state=seed * 2,
+    )
+    generator.fit(X=X)
+    X_tilde = generator.sample()
+    assert X_tilde.shape == (n, p)
+    assert np.linalg.norm(X - X_tilde) < np.sqrt(2) * np.linalg.norm(X)
+
+
+def test_gaussian_equi_3():
+    """test function of gaussian"""
+    seed = 42
+    n = 100
+    p = 50
+    rho = 0.0
+    X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
+    generator = GaussianKnockoffs(
+        cov_estimator=LedoitWolf(),
+        random_state=seed * 2,
+    )
+    generator.fit(X=X)
+    X_tilde = generator.sample()
+    assert X_tilde.shape == (n, p)
+    assert np.linalg.norm(X - X_tilde) < 5 / 3 * np.linalg.norm(X)
 
 
 def test_gaussian_error():

From 22bdd4bca3cc92118d47933abbcd172f65289536 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 9 Sep 2025 10:19:38 +0200
Subject: [PATCH 20/44] remove not necessary test

---
 .../test_gaussian_knockoffs.py                | 34 -------------------
 1 file changed, 34 deletions(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 93b217958..bd2a8f637 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -9,23 +9,6 @@
 
 
 def test_gaussian_equi():
-    """test function of gaussian"""
-    seed = 42
-    n = 100
-    p = 50
-    rho = 1.0
-    X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
-    generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(),
-        random_state=seed * 2,
-    )
-    generator.fit(X=X)
-    X_tilde = generator.sample()
-    assert X_tilde.shape == (n, p)
-    assert np.linalg.norm(X - X_tilde) < np.linalg.norm(X)
-
-
-def test_gaussian_equi_2():
     """test function of gaussian"""
     seed = 42
     n = 100
@@ -42,23 +25,6 @@ def test_gaussian_equi_2():
     assert np.linalg.norm(X - X_tilde) < np.sqrt(2) * np.linalg.norm(X)
 
 
-def test_gaussian_equi_3():
-    """test function of gaussian"""
-    seed = 42
-    n = 100
-    p = 50
-    rho = 0.0
-    X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
-    generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(),
-        random_state=seed * 2,
-    )
-    generator.fit(X=X)
-    X_tilde = generator.sample()
-    assert X_tilde.shape == (n, p)
-    assert np.linalg.norm(X - X_tilde) < 5 / 3 * np.linalg.norm(X)
-
-
 def test_gaussian_error():
     """test function error"""
     seed = 42

From 9429c2acede08b7dc49f603b6a013bf1e4a86e61 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 24 Sep 2025 15:43:18 +0200
Subject: [PATCH 21/44] Update the conditioal sampler for new fit signature

---
 src/hidimstat/knockoffs.py                    |  8 +---
 .../statistical_tools/gaussian_knockoffs.py   | 44 ++++++++++++++-----
 .../test_gaussian_knockoffs.py                | 35 ++++++---------
 3 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index 86ae4d133..bbc8b4ea9 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -189,13 +189,9 @@ def model_x_knockoff(
         X = StandardScaler().fit_transform(X)
 
     # Create knockoff variables
-    conditionnal_sampler = GaussianKnockoffs(
-        cov_estimator,
-        random_state=rng,
-        tol=tol_gauss,
-    )
+    conditionnal_sampler = GaussianKnockoffs(cov_estimator, tol=tol_gauss)
     conditionnal_sampler.fit(X)
-    X_tildes = [conditionnal_sampler.sample() for i in range(n_bootstraps)]
+    X_tildes = conditionnal_sampler.sample(n_samples=n_bootstraps, random_state=rng)
 
     results = parallel(
         delayed(memory.cache(_stat_coefficient_diff))(
diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 74b5ffa61..9b6b66fdc 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -15,8 +15,6 @@ class GaussianKnockoffs:
     cov_estimator : object
         Estimator for computing the covariance matrix. Must implement fit and
         have a covariance_ attribute after fitting.
-    random_state : int or None, default=None
-        Random seed for generating synthetic data.
     tol : float, default=1e-14
         Tolerance for numerical stability in matrix computations.
 
@@ -32,10 +30,9 @@ class GaussianKnockoffs:
     .. footbibliography::
     """
 
-    def __init__(self, cov_estimator, random_state=None, tol=1e-14):
+    def __init__(self, cov_estimator, tol=1e-14):
         self.cov_estimator = cov_estimator
         self.tol = tol
-        self.random_state = random_state
 
     def fit(self, X):
         """
@@ -106,27 +103,52 @@ def _check_fit(self):
         ):
             raise ValueError("The GaussianGenerator requires to be fit before sampling")
 
-    def sample(self):
+    def sample(
+        self,
+        X: np.ndarray = None,
+        y: np.ndarray = None,
+        n_samples: int = 1,
+        random_state=None,
+    ):
         """
         Generate synthetic variables.
         This function generates synthetic variables that preserve the covariance structure
         of the original data while ensuring conditional independence.
 
+        Parameters
+        ----------
+        X : ndarray, default = None
+            The complementary of the considered set of variables, $X^{-j}$.
+        y : ndarray, default = None
+            The group of variables to sample, $X^j$.
+        n_samples : int, default=1
+            The number of samples to draw.
+        random_state : int, default=None
+            The random state to use for sampling.
+
         Returns
         -------
         X_tilde : 2D ndarray (n_samples, n_features)
             The synthetic variables.
         """
+        if X is not None:
+            warnings.warn("this new X won't be used for sample the distribution")
+        if y is not None:
+            warnings.warn("this new y won't be used for sample the distribution")
         self._check_fit()
-        rng = check_random_state(self.random_state)
+        rng = check_random_state(random_state)
         n_samples, n_features = self.mu_tilde_.shape
 
-        # create a uniform noise for all the data
-        u_tilde = rng.randn(n_samples, n_features)
+        X_tildes = []
+        for i in range(n_samples):
+            # create a uniform noise for all the data
+            u_tilde = rng.randn(n_samples, n_features)
 
-        # Equation 1.4 in barber2015controlling
-        X_tilde = self.mu_tilde_ + np.dot(u_tilde, self.sigma_tilde_decompose_)
-        return X_tilde
+            # Equation 1.4 in barber2015controlling
+            X_tildes.append(
+                self.mu_tilde_ + np.dot(u_tilde, self.sigma_tilde_decompose_)
+            )
+        return X_tildes
 
 
 def _s_equi(sigma, tol=1e-14):
diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index bd2a8f637..721054e6b 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -15,12 +15,9 @@ def test_gaussian_equi():
     p = 50
     rho = 0.5
     X, _, _, _ = multivariate_simulation(n, p, rho=rho, seed=seed)
-    generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(),
-        random_state=seed * 2,
-    )
+    generator = GaussianKnockoffs(cov_estimator=LedoitWolf())
     generator.fit(X=X)
-    X_tilde = generator.sample()
+    X_tilde = generator.sample(random_state=seed * 2)[0]
     assert X_tilde.shape == (n, p)
     assert np.linalg.norm(X - X_tilde) < np.sqrt(2) * np.linalg.norm(X)
 
@@ -31,14 +28,11 @@ def test_gaussian_error():
     n = 100
     p = 50
     X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
-    generator = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(),
-        random_state=seed * 2,
-    )
+    generator = GaussianKnockoffs(cov_estimator=LedoitWolf())
     with pytest.raises(
         ValueError, match="The GaussianGenerator requires to be fit before sampling"
     ):
-        generator.sample()
+        generator.sample(random_state=seed * 2)
 
 
 def test_s_equi_not_define_positive():
@@ -73,30 +67,29 @@ def test_s_equi_not_define_positive():
 def test_reproducibility_sample():
     """Test the repeatability of the samples"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
-    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=0)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf())
     gaussian_sampler.fit(X=X)
-    X_tilde_1 = gaussian_sampler.sample()
-    X_tilde_2 = gaussian_sampler.sample()
+    X_tilde_1 = gaussian_sampler.sample(random_state=0)
+    X_tilde_2 = gaussian_sampler.sample(random_state=0)
     assert np.array_equal(X_tilde_1, X_tilde_2)
 
 
 def test_randomness_sample_no_seed():
     """Test the non repeatability of the samples when no seed"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
-    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf(), random_state=None)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf())
     gaussian_sampler.fit(X=X)
-    X_tilde_1 = gaussian_sampler.sample()
-    X_tilde_2 = gaussian_sampler.sample()
+    X_tilde_1 = gaussian_sampler.sample(random_state=None)
+    X_tilde_2 = gaussian_sampler.sample(random_state=None)
     assert not np.array_equal(X_tilde_1, X_tilde_2)
 
 
 def test_randomness_sample_rgn():
     """Test the non repeatability of the samples when the usage of random generator"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
-    gaussian_sampler_rng = GaussianKnockoffs(
-        cov_estimator=LedoitWolf(), random_state=np.random.RandomState(0)
-    )
+    rng = np.random.RandomState(0)
+    gaussian_sampler_rng = GaussianKnockoffs(cov_estimator=LedoitWolf())
     gaussian_sampler_rng.fit(X=X)
-    X_tilde_1 = gaussian_sampler_rng.sample()
-    X_tilde_2 = gaussian_sampler_rng.sample()
+    X_tilde_1 = gaussian_sampler_rng.sample(random_state=rng)
+    X_tilde_2 = gaussian_sampler_rng.sample(random_state=rng)
     assert not np.array_equal(X_tilde_1, X_tilde_2)

From 4dfd67753fd7e031f65b2ac15d025ae06023602b Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 15:27:46 +0200
Subject: [PATCH 22/44] fix test

---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 +-
 test/statistical_tools/test_gaussian_knockoffs.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index d664f6f63..c5f0f98cc 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -144,7 +144,7 @@ def sample(
         X_tildes = []
         for i in range(n_samples):
             # create a uniform noise for all the data
-            u_tilde = rng.randn(n_samples, n_features)
+            u_tilde = rng.standard_normal([n_samples, n_features])
 
             # Equation 1.4 in barber2015controlling
             X_tildes.append(
diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 721054e6b..12dfe883b 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -87,7 +87,7 @@ def test_randomness_sample_no_seed():
 def test_randomness_sample_rgn():
     """Test the non repeatability of the samples when the usage of random generator"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
-    rng = np.random.RandomState(0)
+    rng = np.random.default_rng(0)
     gaussian_sampler_rng = GaussianKnockoffs(cov_estimator=LedoitWolf())
     gaussian_sampler_rng.fit(X=X)
     X_tilde_1 = gaussian_sampler_rng.sample(random_state=rng)

From c5e1e76fae853e1782b66b5915256f102afaa93e Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 15:29:59 +0200
Subject: [PATCH 23/44] fix isort

---
 test/statistical_tools/test_gaussian_knockoffs.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 12dfe883b..58e4cb571 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -1,11 +1,12 @@
 import numpy as np
 import pytest
+from sklearn.covariance import LedoitWolf
+
+from hidimstat._utils.scenario import multivariate_simulation
 from hidimstat.statistical_tools.gaussian_knockoffs import (
-    _s_equi,
     GaussianKnockoffs,
+    _s_equi,
 )
-from hidimstat._utils.scenario import multivariate_simulation
-from sklearn.covariance import LedoitWolf
 
 
 def test_gaussian_equi():

From 45bb053d5c8d806fd1dcda3c373a677ccb7cc4e2 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 15:35:37 +0200
Subject: [PATCH 24/44] fix examples

---
 examples/plot_pitfalls_permutation_importance.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/examples/plot_pitfalls_permutation_importance.py b/examples/plot_pitfalls_permutation_importance.py
index 7534865f8..a4e0ed310 100644
--- a/examples/plot_pitfalls_permutation_importance.py
+++ b/examples/plot_pitfalls_permutation_importance.py
@@ -14,9 +14,6 @@
 
 import numpy as np
 
-from hidimstat import CFI, PFI
-from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
-
 rng = np.random.default_rng(0)
 
 # %%
@@ -270,7 +267,7 @@
 
 from matplotlib.lines import Line2D
 
-from hidimstat.conditional_sampling import ConditionalSampler
+from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler
 
 X_train, X_test = train_test_split(
     X,

From 7e73c2aed8beddedbf9dd50dfc4d5ef53129e8ed Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 15:45:15 +0200
Subject: [PATCH 25/44] fix documentation

---
 docs/src/api.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/src/api.rst b/docs/src/api.rst
index f11165742..f17b4ff76 100644
--- a/docs/src/api.rst
+++ b/docs/src/api.rst
@@ -52,7 +52,8 @@ Samplers
    :toctree: ./generated/api/class/
    :template: class.rst
 
-   conditional_sampling.ConditionalSampler
+   statistical_tools.conditional_sampling.ConditionalSampler
+   statistical_tools.gaussian_knockoffs.GaussianKnockoffs
 
 Helper Functions
 ================

From 893670384407b85aae60c2844dd393b966ed0fd8 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 15:55:54 +0200
Subject: [PATCH 26/44] fix documentation

---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index c5f0f98cc..53228fa83 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -16,7 +16,7 @@ class GaussianKnockoffs:
     ----------
     cov_estimator : object
         Estimator for computing the covariance matrix. Must implement fit and
-        have a covariance_ attribute after fitting.
+        have a `covariance_` attribute after fitting.
     tol : float, default=1e-14
         Tolerance for numerical stability in matrix computations.
 

From 03f914ccdb3e380b682b0237768f9eb118772b93 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 16:48:51 +0200
Subject: [PATCH 27/44] update the user guide.

---
 docs/src/helper_methods.rst | 16 ++++++++++++++++
 docs/src/user_guide.rst     |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/helper_methods.rst

diff --git a/docs/src/helper_methods.rst b/docs/src/helper_methods.rst
new file mode 100644
index 000000000..05e01d8fc
--- /dev/null
+++ b/docs/src/helper_methods.rst
@@ -0,0 +1,16 @@
+.. _helper_methods:
+
+
+==============
+Helper Methods
+==============
+
+Statistical Tools
+-----------------
+
+The statistical tool box contains the statistical methods and functions required
+by the variable importance methods but is not specific to one method or more general.
+Actually, there are the functions of aggregation pvalues, conditional samplers and 
+the management of multiple testing functions.
+
+TODO: need to be completed.
diff --git a/docs/src/user_guide.rst b/docs/src/user_guide.rst
index 48bbe5e66..43cc76267 100644
--- a/docs/src/user_guide.rst
+++ b/docs/src/user_guide.rst
@@ -21,4 +21,4 @@ Table of contents
    visualization.rst
    grouping.rst
    high_dimension.rst
-   
\ No newline at end of file
+   helper_methods.rst
\ No newline at end of file

From a18fa8fd9fa06e6cf68069504068d0e0c873427e Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Fri, 10 Oct 2025 17:01:21 +0200
Subject: [PATCH 28/44] try to fix the end of file

---
 docs/src/user_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/user_guide.rst b/docs/src/user_guide.rst
index 43cc76267..e490bf5b6 100644
--- a/docs/src/user_guide.rst
+++ b/docs/src/user_guide.rst
@@ -21,4 +21,4 @@ Table of contents
    visualization.rst
    grouping.rst
    high_dimension.rst
-   helper_methods.rst
\ No newline at end of file
+   helper_methods.rst

From 8ef5c063e7906d71f6dee5a9071dc889ad2920f2 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 10:13:02 +0200
Subject: [PATCH 29/44] Apply suggestion from @bthirion

Co-authored-by: bthirion <bertrand.thirion@inria.fr>
---
 test/statistical_tools/test_gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 58e4cb571..769a36ce5 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -36,7 +36,7 @@ def test_gaussian_error():
         generator.sample(random_state=seed * 2)
 
 
-def test_s_equi_not_define_positive():
+def test_s_equi_not_definite_positive():
     """test the warning and error of s_equi function"""
     n = 10
     tol = 1e-7

From 720cf4418642d62259421ea4f206439af3c7acdc Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:36:42 +0200
Subject: [PATCH 30/44] update makefile

---
 docs/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index 711611b0d..cf76bb912 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -2,13 +2,14 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    = -j auto -W --keep-going -n 
+N_JOB         = auto
+SPHINXOPTS    = -j ${N_JOB} -W --keep-going -n 
 # -j/--jobs:		    N processes in parallel (auto=number of CPUs)
 # -W/--fail-on-warning: Turn warnings into errors.
 # --keep-going:         Runs sphinx-build to completion and exits  
 #			    		with exit status 1 if errors are encountered.
 # -n/--nitpicky:	    This generates warnings for all missing references.
-SPHINXBUILD   = sphinx-build
+SPHINXBUILD   = "LANG=C python -X faulthandler -m sphinx"
 PAPER         =
 BUILDDIR      = ./_build
 

From 410cefcfbb1a282d9e7c7a692e5b1dce623617ee Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:37:41 +0200
Subject: [PATCH 31/44] Add comment

[skip tests]
---
 docs/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Makefile b/docs/Makefile
index cf76bb912..f07ec09a1 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -9,7 +9,7 @@ SPHINXOPTS    = -j ${N_JOB} -W --keep-going -n
 # --keep-going:         Runs sphinx-build to completion and exits  
 #			    		with exit status 1 if errors are encountered.
 # -n/--nitpicky:	    This generates warnings for all missing references.
-SPHINXBUILD   = "LANG=C python -X faulthandler -m sphinx"
+SPHINXBUILD   = "LANG=C python -X faulthandler -m sphinx" # https://github.com/sphinx-doc/sphinx/issues/11449
 PAPER         =
 BUILDDIR      = ./_build
 

From 8b9619439d668b555a5592ed63f28825cae4e9f7 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:50:46 +0200
Subject: [PATCH 32/44] update command

---
 tools/documentation/circleci/build_doc.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/documentation/circleci/build_doc.sh b/tools/documentation/circleci/build_doc.sh
index a7e99abf0..5dd7c2041 100755
--- a/tools/documentation/circleci/build_doc.sh
+++ b/tools/documentation/circleci/build_doc.sh
@@ -114,7 +114,7 @@ else
 fi
 
 # The pipefail is requested to propagate exit code
-set -o pipefail && cd docs && make $make_args 2>&1 | tee ~/output_sphinx.txt
+set -o pipefail && cd docs && make N_JOB=1 $make_args 2>&1 | tee ~/output_sphinx.txt
 cd -
 
 set +o pipefail

From dde7b47b578644384a9b74bfd0666adf349dc86d Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:51:20 +0200
Subject: [PATCH 33/44] [skip tests]


From 611d0ecc42290cf275b9fbe5b9636c4b8e99a634 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:55:08 +0200
Subject: [PATCH 34/44] fix makefile

---
 docs/Makefile                             | 9 +++++----
 tools/documentation/circleci/build_doc.sh | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index f07ec09a1..75d54320f 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -9,7 +9,8 @@ SPHINXOPTS    = -j ${N_JOB} -W --keep-going -n
 # --keep-going:         Runs sphinx-build to completion and exits  
 #			    		with exit status 1 if errors are encountered.
 # -n/--nitpicky:	    This generates warnings for all missing references.
-SPHINXBUILD   = "LANG=C python -X faulthandler -m sphinx" # https://github.com/sphinx-doc/sphinx/issues/11449
+SPHINXBUILD   = LANG=C python -X faulthandler -m sphinx
+# https://github.com/sphinx-doc/sphinx/issues/11449
 PAPER         =
 BUILDDIR      = ./_build
 
@@ -25,9 +26,9 @@ ifneq ($(EXAMPLES_PATTERN),)
 endif
 
 # User-friendly check for sphinx-build
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
+# ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+# $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+# endif
 
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
diff --git a/tools/documentation/circleci/build_doc.sh b/tools/documentation/circleci/build_doc.sh
index 5dd7c2041..a7e99abf0 100755
--- a/tools/documentation/circleci/build_doc.sh
+++ b/tools/documentation/circleci/build_doc.sh
@@ -114,7 +114,7 @@ else
 fi
 
 # The pipefail is requested to propagate exit code
-set -o pipefail && cd docs && make N_JOB=1 $make_args 2>&1 | tee ~/output_sphinx.txt
+set -o pipefail && cd docs && make $make_args 2>&1 | tee ~/output_sphinx.txt
 cd -
 
 set +o pipefail

From c92911f8e41b39700908271c703dca044d501968 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 11:57:03 +0200
Subject: [PATCH 35/44] [skip tests]


From 46b835862df81a36ae45dfdfbafa8a7fc3558bf1 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 12:08:55 +0200
Subject: [PATCH 36/44] change signature of sample

---
 src/hidimstat/knockoffs.py                    |  2 +-
 .../statistical_tools/gaussian_knockoffs.py   | 20 +++++--------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/hidimstat/knockoffs.py b/src/hidimstat/knockoffs.py
index be70a0fba..fd2d967ff 100644
--- a/src/hidimstat/knockoffs.py
+++ b/src/hidimstat/knockoffs.py
@@ -191,7 +191,7 @@ def model_x_knockoff(
     conditionnal_sampler = GaussianKnockoffs(cov_estimator, tol=tol_gauss)
     conditionnal_sampler.fit(X)
     X_tildes = conditionnal_sampler.sample(
-        n_samples=n_bootstraps, random_state=random_state
+        n_repeats=n_bootstraps, random_state=random_state
     )
 
     results = parallel(
diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 53228fa83..59d0fe6ee 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -107,9 +107,7 @@ def _check_fit(self):
 
     def sample(
         self,
-        X: np.ndarray = None,
-        y: np.ndarray = None,
-        n_samples: int = 1,
+        n_repeats: int = 1,
         random_state=None,
     ):
         """
@@ -119,11 +117,7 @@ def sample(
 
         Parameters
         ----------
-        X : ndarray, default = None
-            The complementary of the considered set of variables, $X^{-j}$.
-        y : ndarray, default = None
-            The group of variables to sample, $X^j$.
-        n_samples : int, default=1
+        n_repeats : int, default=1
             The number of samples to draw.
         random_state : int or None, default=None
             The random state to use for sampling.
@@ -133,18 +127,14 @@ def sample(
         X_tilde : 2D ndarray (n_samples, n_features)
             The synthetic variables.
         """
-        if X is not None:
-            warnings.warn("this new X won't be used for sample the distribution")
-        if y is not None:
-            warnings.warn("this new y won't be used for sample the distribution")
         self._check_fit()
         rng = check_random_state(random_state)
-        n_samples, n_features = self.mu_tilde_.shape
+        n_repeats, n_features = self.mu_tilde_.shape
 
         X_tildes = []
-        for i in range(n_samples):
+        for i in range(n_repeats):
             # create a uniform noise for all the data
-            u_tilde = rng.standard_normal([n_samples, n_features])
+            u_tilde = rng.standard_normal([n_repeats, n_features])
 
             # Equation 1.4 in barber2015controlling
             X_tildes.append(

From bd7ace1c78fdce9fd81b7fccd007d089a0cc96ac Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Mon, 13 Oct 2025 13:57:35 +0200
Subject: [PATCH 37/44] remove fix

---
 docs/Makefile | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/docs/Makefile b/docs/Makefile
index 75d54320f..711611b0d 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -2,15 +2,13 @@
 #
 
 # You can set these variables from the command line.
-N_JOB         = auto
-SPHINXOPTS    = -j ${N_JOB} -W --keep-going -n 
+SPHINXOPTS    = -j auto -W --keep-going -n 
 # -j/--jobs:		    N processes in parallel (auto=number of CPUs)
 # -W/--fail-on-warning: Turn warnings into errors.
 # --keep-going:         Runs sphinx-build to completion and exits  
 #			    		with exit status 1 if errors are encountered.
 # -n/--nitpicky:	    This generates warnings for all missing references.
-SPHINXBUILD   = LANG=C python -X faulthandler -m sphinx
-# https://github.com/sphinx-doc/sphinx/issues/11449
+SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = ./_build
 
@@ -26,9 +24,9 @@ ifneq ($(EXAMPLES_PATTERN),)
 endif
 
 # User-friendly check for sphinx-build
-# ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-# $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-# endif
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
 
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4

From 5cd94f38ebb56f53ba0da7ed72a13372498cf6b0 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Tue, 14 Oct 2025 12:04:15 +0200
Subject: [PATCH 38/44] remove documentation

---
 docs/src/helper_methods.rst | 16 ----------------
 docs/src/user_guide.rst     |  1 -
 2 files changed, 17 deletions(-)
 delete mode 100644 docs/src/helper_methods.rst

diff --git a/docs/src/helper_methods.rst b/docs/src/helper_methods.rst
deleted file mode 100644
index 05e01d8fc..000000000
--- a/docs/src/helper_methods.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-.. _helper_methods:
-
-
-==============
-Helper Methods
-==============
-
-Statistical Tools
------------------
-
-The statistical tool box contains the statistical methods and functions required
-by the variable importance methods but is not specific to one method or more general.
-Actually, there are the functions of aggregation pvalues, conditional samplers and 
-the management of multiple testing functions.
-
-TODO: need to be completed.
diff --git a/docs/src/user_guide.rst b/docs/src/user_guide.rst
index e490bf5b6..ef084b7c1 100644
--- a/docs/src/user_guide.rst
+++ b/docs/src/user_guide.rst
@@ -21,4 +21,3 @@ Table of contents
    visualization.rst
    grouping.rst
    high_dimension.rst
-   helper_methods.rst

From e3450ec439d39b499ac9e91db635428b3e540c42 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:07:05 +0200
Subject: [PATCH 39/44] Update
 src/hidimstat/statistical_tools/gaussian_knockoffs.py

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 59d0fe6ee..03ef19fed 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -124,7 +124,7 @@ def sample(
 
         Returns
         -------
-        X_tilde : 2D ndarray (n_samples, n_features)
+        X_tilde : 3D ndarray (n_repeats, n_samples, n_features)
             The synthetic variables.
         """
         self._check_fit()

From b5847a190003c79fd0ddec8ad10439875d3db9e6 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:07:16 +0200
Subject: [PATCH 40/44] Update
 src/hidimstat/statistical_tools/gaussian_knockoffs.py

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index 03ef19fed..b6633dcb2 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -140,7 +140,7 @@ def sample(
             X_tildes.append(
                 self.mu_tilde_ + np.dot(u_tilde, self.sigma_tilde_decompose_)
             )
-        return X_tildes
+        return np.stack(X_tildes)
 
 
 def _s_equi(sigma, tol=1e-14):

From 9f1ca5a946a6d697a67ef2084070c009d86a1b23 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:07:29 +0200
Subject: [PATCH 41/44] Update
 src/hidimstat/statistical_tools/gaussian_knockoffs.py

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index b6633dcb2..f1b846c20 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -18,7 +18,8 @@ class GaussianKnockoffs:
         Estimator for computing the covariance matrix. Must implement fit and
         have a `covariance_` attribute after fitting.
     tol : float, default=1e-14
-        Tolerance for numerical stability in matrix computations.
+        Tolerance threshold. While the smallest eigenvalue of :math:`2\Sigma - diag(S)` 
+        is smaller than this threshold, S is incrementally increased. 
 
     Attributes
     ----------

From 285690c45e31b37bdb833fad036c2d2129601976 Mon Sep 17 00:00:00 2001
From: lionel kusch <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:07:38 +0200
Subject: [PATCH 42/44] Update
 src/hidimstat/statistical_tools/gaussian_knockoffs.py

Co-authored-by: Joseph Paillard <joseph.paillard@inria.fr>
---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index f1b846c20..b38bddc8f 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -119,7 +119,7 @@ def sample(
         Parameters
         ----------
         n_repeats : int, default=1
-            The number of samples to draw.
+            The number of sets of Gaussian knockoff variables
         random_state : int or None, default=None
             The random state to use for sampling.
 

From 2f8b0ba1f656b6f1e3cef8f09fee5cc55eccb247 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:12:34 +0200
Subject: [PATCH 43/44] format file

---
 src/hidimstat/statistical_tools/gaussian_knockoffs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hidimstat/statistical_tools/gaussian_knockoffs.py b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
index b38bddc8f..7f9c85ea9 100644
--- a/src/hidimstat/statistical_tools/gaussian_knockoffs.py
+++ b/src/hidimstat/statistical_tools/gaussian_knockoffs.py
@@ -18,8 +18,8 @@ class GaussianKnockoffs:
         Estimator for computing the covariance matrix. Must implement fit and
         have a `covariance_` attribute after fitting.
     tol : float, default=1e-14
-        Tolerance threshold. While the smallest eigenvalue of :math:`2\Sigma - diag(S)` 
-        is smaller than this threshold, S is incrementally increased. 
+        Tolerance threshold. While the smallest eigenvalue of :math:`2\Sigma - diag(S)`
+        is smaller than this threshold, S is incrementally increased.
 
     Attributes
     ----------

From 43be1835bbbbc57b51a7e53ade2558a2c485f146 Mon Sep 17 00:00:00 2001
From: kusch lionel <lionel.a.kusch@inria.fr>
Date: Wed, 15 Oct 2025 11:16:42 +0200
Subject: [PATCH 44/44] add test with repeat

---
 .../test_gaussian_knockoffs.py                | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/test/statistical_tools/test_gaussian_knockoffs.py b/test/statistical_tools/test_gaussian_knockoffs.py
index 769a36ce5..db07ff4e6 100644
--- a/test/statistical_tools/test_gaussian_knockoffs.py
+++ b/test/statistical_tools/test_gaussian_knockoffs.py
@@ -75,6 +75,16 @@ def test_reproducibility_sample():
     assert np.array_equal(X_tilde_1, X_tilde_2)
 
 
+def test_reproducibility_sample_repeat():
+    """Test the repeatability of the samples"""
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf())
+    gaussian_sampler.fit(X=X)
+    X_tilde_1 = gaussian_sampler.sample(n_repeats=3, random_state=0)
+    X_tilde_2 = gaussian_sampler.sample(n_repeats=3, random_state=0)
+    assert np.array_equal(X_tilde_1, X_tilde_2)
+
+
 def test_randomness_sample_no_seed():
     """Test the non repeatability of the samples when no seed"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
@@ -85,6 +95,16 @@ def test_randomness_sample_no_seed():
     assert not np.array_equal(X_tilde_1, X_tilde_2)
 
 
+def test_randomness_sample_no_seed_repeat():
+    """Test the non repeatability of the samples when no seed"""
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
+    gaussian_sampler = GaussianKnockoffs(cov_estimator=LedoitWolf())
+    gaussian_sampler.fit(X=X)
+    X_tilde_1 = gaussian_sampler.sample(n_repeats=3, random_state=None)
+    X_tilde_2 = gaussian_sampler.sample(n_repeats=3, random_state=None)
+    assert not np.array_equal(X_tilde_1, X_tilde_2)
+
+
 def test_randomness_sample_rgn():
     """Test the non repeatability of the samples when the usage of random generator"""
     X, _, _, _ = multivariate_simulation(100, 10, seed=0)
@@ -94,3 +114,14 @@ def test_randomness_sample_rgn():
     X_tilde_1 = gaussian_sampler_rng.sample(random_state=rng)
     X_tilde_2 = gaussian_sampler_rng.sample(random_state=rng)
     assert not np.array_equal(X_tilde_1, X_tilde_2)
+
+
+def test_randomness_sample_rgn_repeat():
+    """Test the non repeatability of the samples when the usage of random generator"""
+    X, _, _, _ = multivariate_simulation(100, 10, seed=0)
+    rng = np.random.default_rng(0)
+    gaussian_sampler_rng = GaussianKnockoffs(cov_estimator=LedoitWolf())
+    gaussian_sampler_rng.fit(X=X)
+    X_tilde_1 = gaussian_sampler_rng.sample(n_repeats=3, random_state=rng)
+    X_tilde_2 = gaussian_sampler_rng.sample(n_repeats=3, random_state=rng)
+    assert not np.array_equal(X_tilde_1, X_tilde_2)