[Tests] fix unstable tests for desparsified Lasso and knockoffs (#474)

jpaillard · web-flow · commit 7d642a4cd257 · 2025-10-09T16:11:34.000+02:00
* fix unstable tests

* add tolerance to test

* use confidence as threshold

* remove seed + check fdp and power

* same for group desparsified

* test interval coverage good 70% of the time

* update docstring

* typo
diff --git a/test/test_desparsified_lasso.py b/test/test_desparsified_lasso.py
@@ -16,14 +16,23 @@
 
 
 def test_desparsified_lasso():
-    """Testing the procedure on a simulation with no structure and
-    a support of size 1. Computing 99% confidence bounds and checking
-    that they contains the true parameter vector."""
-
-    n_samples, n_features = 52, 50
-    support_size = 1
-    signal_noise_ratio = 50
+    """
+    Test desparsified lasso on a simple simulation with no structure and
+    a support of size 5.
+     - Test that the confidence intervals contain the true beta 70% of the time. This
+    threshold is arbitrary.
+     - Test that the empirical false discovery proportion is below the target FDR
+    Although this is not guaranteed (control is only in expectation), the scenario
+    is simple enough for the test to pass
+    - Test that the true discovery proportion is above 80%, this threshold is arbitrary
+    """
+
+    n_samples, n_features = 400, 40
+    support_size = 5
+    signal_noise_ratio = 32
     rho = 0.0
+    confidence = 0.9
+    alpha = 1 - confidence
 
     X, y, beta, noise = multivariate_simulation(
         n_samples=n_samples,
@@ -32,78 +41,93 @@ def test_desparsified_lasso():
         signal_noise_ratio=signal_noise_ratio,
         rho=rho,
         shuffle=False,
-        seed=10,
     )
-    expected_pval_corr = np.ones_like(beta) * 0.5
-    expected_pval_corr[beta != 0] = 0.0
 
     beta_hat, sigma_hat, precision_diag = desparsified_lasso(X, y)
-    pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
-        desparsified_lasso_pvalue(
-            X.shape[0], beta_hat, sigma_hat, precision_diag, confidence=0.99
-        )
+    _, pval_corr, _, _, cb_min, cb_max = desparsified_lasso_pvalue(
+        X.shape[0], beta_hat, sigma_hat, precision_diag, confidence=confidence
     )
-    assert_almost_equal(beta_hat, beta, decimal=1)
-    assert_equal(cb_min < beta, True)
-    assert_equal(cb_max > beta, True)
-    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
+    # Check that beta is within the confidence intervals
+    correct_interval = np.sum((beta >= cb_min) & (beta <= cb_max))
+    assert correct_interval >= int(0.7 * n_features)
+
+    # Check p-values for important and non-important features
+    important = beta != 0
+    non_important = beta == 0
+    tp = np.sum(pval_corr[important] < alpha)
+    fp = np.sum(pval_corr[non_important] < alpha)
+    assert fp / np.sum(non_important) <= alpha
+    assert tp / np.sum(important) >= 0.8
 
     beta_hat, sigma_hat, precision_diag = desparsified_lasso(X, y, dof_ajdustement=True)
-    pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
-        desparsified_lasso_pvalue(
-            X.shape[0], beta_hat, sigma_hat, precision_diag, confidence=0.99
-        )
+    _, pval_corr, _, _, cb_min, cb_max = desparsified_lasso_pvalue(
+        X.shape[0], beta_hat, sigma_hat, precision_diag, confidence=confidence
     )
-    assert_almost_equal(beta_hat, beta, decimal=1)
-    assert_equal(cb_min < beta, True)
-    assert_equal(cb_max > beta, True)
-    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
+    # Check that beta is within the confidence intervals
+    correct_interval = np.sum((beta >= cb_min) & (beta <= cb_max))
+    assert correct_interval >= int(0.7 * n_features)
 
+    # Check p-values for important and non-important features
+    tp = np.sum(pval_corr[important] < alpha)
+    fp = np.sum(pval_corr[non_important] < alpha)
+    assert fp / np.sum(non_important) <= alpha
+    assert tp / np.sum(important) >= 0.8
 
-def test_desparsified_group_lasso():
-    """Testing the procedure on a simulation with no structure and
-    a support of size 2. Computing one-sided p-values, we want
-    low p-values for the features of the support and p-values
-    close to 0.5 for the others."""
 
-    n_samples = 50
-    n_features = 100
+def test_desparsified_group_lasso():
+    """
+    Testing the procedure on a simulation with no structure and a support of size 2.
+     - Test that the empirical false discovery proportion is below the target FDR
+    Although this is not guaranteed (control is only in expectation), the scenario
+    is simple enough for the test to pass.
+     - Test that the true discovery proportion is above 80%, this threshold is arbitrary
+    """
+
+    n_samples = 400
+    n_features = 40
     n_target = 10
-    support_size = 2
-    signal_noise_ratio = 5000
+    support_size = 5
+    signal_noise_ratio = 32
     rho_serial = 0.9
+    alpha = 0.1
+
     corr = toeplitz(np.geomspace(1, rho_serial ** (n_target - 1), n_target))
 
-    X, Y, beta, noise = multivariate_simulation(
+    X, Y, beta, _ = multivariate_simulation(
         n_samples=n_samples,
         n_features=n_features,
         n_targets=n_target,
         support_size=support_size,
         rho_serial=rho_serial,
         signal_noise_ratio=signal_noise_ratio,
-        seed=10,
     )
 
     beta_hat, theta_hat, precision_diag = desparsified_lasso(
         X, Y, multioutput=True, covariance=corr
     )
-    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        desparsified_group_lasso_pvalue(beta_hat, theta_hat, precision_diag)
+    _, pval_corr, _, _ = desparsified_group_lasso_pvalue(
+        beta_hat, theta_hat, precision_diag
     )
 
-    expected_pval_corr = np.ones_like(beta[:, 0]) * 0.5
-    expected_pval_corr[beta[:, 0] != 0] = 0.0
+    important = beta[:, 0] != 0
+    non_important = beta[:, 0] == 0
 
     assert_almost_equal(beta_hat, beta, decimal=1)
-    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
+    tp = np.sum(pval_corr[important] < alpha)
+    fp = np.sum(pval_corr[non_important] < alpha)
+    assert fp / np.sum(non_important) <= alpha
+    assert tp / np.sum(important) >= 0.8
 
     beta_hat, theta_hat, precision_diag = desparsified_lasso(X, Y, multioutput=True)
-    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        desparsified_group_lasso_pvalue(beta_hat, theta_hat, precision_diag, test="F")
+    _, pval_corr, _, _ = desparsified_group_lasso_pvalue(
+        beta_hat, theta_hat, precision_diag
     )
 
     assert_almost_equal(beta_hat, beta, decimal=1)
-    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
+    tp = np.sum(pval_corr[important] < alpha)
+    fp = np.sum(pval_corr[non_important] < alpha)
+    assert fp / np.sum(non_important) <= alpha
+    assert tp / np.sum(important) >= 0.8
 
     # Testing error is raised when the covariance matrix has wrong shape
     bad_cov = np.delete(corr, 0, axis=1)
diff --git a/test/test_knockoff.py b/test/test_knockoff.py
@@ -205,38 +205,48 @@ def test_model_x_knockoff_exception():
 def test_estimate_distribution():
     """
     test different estimation of the covariance
-    TODO: This test is unstable, testing for perfect recovery of the support with
-    n=100 and p=50 is too ambitious. It currently passes thanks to a lucky draw.
+     - Test that the empirical false discovery proportion is below the target FDR
+    Although this is not guaranteed (control is only in expectation), the scenario
+    is simple enough for the test to pass.
+     - Test that the true discovery proportion is above 80%, this threshold is arbitrary
     """
-    seed = 3
-    fdr = 0.1
-    n = 100
-    p = 50
-    X, y, beta, noise = multivariate_simulation(n, p, seed=seed)
+    fdr = 0.2
+    n = 400
+    p = 100
+    signal_noise_ratio = 32
+    support_size = 5
+
+    X, y, beta, noise = multivariate_simulation(
+        n, p, support_size=support_size, signal_noise_ratio=signal_noise_ratio
+    )
     non_zero = np.where(beta)[0]
-    selected, test_scores, threshold, X_tildes = model_x_knockoff(
+    selected, _, _, _ = model_x_knockoff(
         X,
         y,
         cov_estimator=LedoitWolf(assume_centered=True),
         n_bootstraps=1,
-        random_state=seed,
         fdr=fdr,
     )
-    for i in selected:
-        assert np.any(i == non_zero)
-    selected, test_scores, threshold, X_tildes = model_x_knockoff(
+    tp = len(set(selected) & set(non_zero))
+    fp = len(set(selected) - set(non_zero))
+    assert fp / (p - len(non_zero)) <= fdr
+    assert tp / len(non_zero) >= 0.8
+
+    selected, _, _, _ = model_x_knockoff(
         X,
         y,
         cov_estimator=GraphicalLassoCV(
             alphas=[1e-3, 1e-2, 1e-1, 1],
-            cv=KFold(n_splits=5, shuffle=True, random_state=seed + 2),
+            cv=KFold(n_splits=5, shuffle=True),
         ),
         n_bootstraps=1,
-        random_state=seed,
         fdr=fdr,
     )
-    for i in selected:
-        assert np.any(i == non_zero)
+
+    tp = len(set(selected) & set(non_zero))
+    fp = len(set(selected) - set(non_zero))
+    assert fp / (p - len(non_zero)) <= fdr
+    assert tp / len(non_zero) >= 0.8
 
 
 def test_gaussian_knockoff_equi():