Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Functions
desparsified_group_lasso_pvalue
ensemble_clustered_inference
ensemble_clustered_inference_pvalue
model_x_knockoff
reid

Classes
Expand All @@ -36,6 +35,7 @@ Classes

BaseVariableImportance
BasePerturbation
ModelXKnockoff
LOCO
CFI
PFI
Expand Down
55 changes: 15 additions & 40 deletions examples/plot_knockoff_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,9 @@
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.model_selection import KFold
from sklearn.utils import check_random_state

from hidimstat.knockoffs import (
model_x_knockoff,
model_x_knockoff_bootstrap_e_value,
model_x_knockoff_bootstrap_quantile,
model_x_knockoff_pvalue,
)
from hidimstat.knockoffs import ModelXKnockoff
from hidimstat.statistical_tools.multiple_testing import fdp_power
from hidimstat._utils.scenario import multivariate_simulation

Expand Down Expand Up @@ -95,46 +88,28 @@ def single_run(
non_zero_index = np.where(beta_true)[0]

# Use model-X Knockoffs [1]
selected, test_scores, threshold, X_tildes = model_x_knockoff(
X,
y,
estimator=LassoCV(
n_jobs=1,
cv=KFold(n_splits=5, shuffle=True, random_state=0),
),
n_bootstraps=1,
random_state=seed,
)
mx_selection, _ = model_x_knockoff_pvalue(test_scores, fdr=fdr)
fdp_mx, power_mx = fdp_power(mx_selection, non_zero_index)
model_x_knockoff = ModelXKnockoff(n_repeat=1)
model_x_knockoff.fit_importance(X, y)
mx_selection = model_x_knockoff.selection_fdr(fdr=fdr)
fdp_mx, power_mx = fdp_power(np.where(mx_selection)[0], non_zero_index)

# Use aggregation model-X Knockoffs [2]
selected, test_scores, threshold, X_tildes = model_x_knockoff(
X,
y,
estimator=LassoCV(
n_jobs=1,
cv=KFold(n_splits=5, shuffle=True, random_state=0),
),
n_bootstraps=n_bootstraps,
n_jobs=1,
random_state=seed,
)
model_x_knockoff_repeat = ModelXKnockoff(n_repeat=n_bootstraps)
model_x_knockoff_repeat.fit_importance(X, y)

# Use p-values aggregation [2]
aggregated_ko_selection, _, _ = model_x_knockoff_bootstrap_quantile(
test_scores, fdr=fdr, adaptive_aggregation=True
aggregated_ko_selection = model_x_knockoff_repeat.selection_fdr(
fdr=fdr, adaptive_aggregation=True
)
fdp_pval, power_pval = fdp_power(
np.where(aggregated_ko_selection)[0], non_zero_index
)

fdp_pval, power_pval = fdp_power(aggregated_ko_selection, non_zero_index)

# Use e-values aggregation [3]
eval_selection, _, _ = model_x_knockoff_bootstrap_e_value(
test_scores, threshold, fdr=fdr
eval_selection = model_x_knockoff_repeat.selection_fdr(
fdr=fdr, fdr_control="ebh", evalues=True
)

fdp_eval, power_eval = fdp_power(eval_selection, non_zero_index)

fdp_eval, power_eval = fdp_power(np.where(eval_selection)[0], non_zero_index)
return fdp_mx, fdp_pval, fdp_eval, power_mx, power_pval, power_eval


Expand Down
62 changes: 39 additions & 23 deletions examples/plot_knockoffs_wisconsin.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,30 +139,44 @@
# We use the Model-X Knockoff procedure to control the FDR (False Discovery Rate). The
# selection of variables is based on the Lasso Coefficient Difference (LCD) statistic
# :footcite:t:`candes2018panning`.
from hidimstat import model_x_knockoff

fdr = 0.2

selected, test_scores, threshold, X_tildes = model_x_knockoff(
from sklearn.covariance import LedoitWolf
from hidimstat import ModelXKnockoff
from hidimstat.statistical_tools.lasso_test import lasso_statistic_with_sampling
from hidimstat.statistical_tools.gaussian_distribution import GaussianDistribution


def logistic_test(X, X_tilde, y):
return lasso_statistic_with_sampling(
X,
X_tilde,
y,
lasso=LogisticRegressionCV(
solver="liblinear",
penalty="l1",
Cs=np.logspace(-3, 3, 10),
random_state=rng,
tol=1e-3,
max_iter=1000,
),
preconfigure_lasso=None,
)


model_x_knockoff = ModelXKnockoff(
generator=GaussianDistribution(
cov_estimator=LedoitWolf(assume_centered=True), random_state=0, tol=1e-15
),
statistical_test=logistic_test,
n_repeat=1,
)
importance = model_x_knockoff.fit_importance(
noisy_train,
y_train,
estimator=LogisticRegressionCV(
solver="liblinear",
penalty="l1",
Cs=np.logspace(-3, 3, 10),
random_state=rng,
tol=1e-3,
max_iter=1000,
),
n_bootstraps=1,
random_state=0,
tol_gauss=1e-15,
preconfigure_estimator=None,
fdr=fdr,
)
selection = model_x_knockoff.selection_fdr(fdr=0.2)

# Count how many selected features are actually noise
num_false_discoveries = np.sum(selected >= p)
num_false_discoveries = np.sum(selection[p:])
print(f"Knockoffs make at least {num_false_discoveries} False Discoveries")


Expand All @@ -177,11 +191,11 @@
import matplotlib.pyplot as plt
import seaborn as sns

selected_mask = np.array(["not selected"] * len(test_scores))
selected_mask[selected] = "selected"
selected_mask = np.array(["not selected"] * len(importance))
selected_mask[selection] = "selected"
df_ko = pd.DataFrame(
{
"score": test_scores,
"score": importance,
"variable": feature_names_noise,
"selected": selected_mask,
}
Expand All @@ -202,7 +216,9 @@
ax=ax,
palette={"selected": "tab:red", "not selected": "tab:gray"},
)
ax.axvline(x=threshold, color="k", linestyle="--", label="Threshold")
ax.axvline(
x=model_x_knockoff.threshold_fdr_, color="k", linestyle="--", label="Threshold"
)
ax.legend()
ax.set_xlabel("KO statistic (LCD)")
ax.set_ylabel("")
Expand Down
2 changes: 1 addition & 1 deletion examples/plot_pitfalls_permutation_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sklearn.preprocessing import StandardScaler

from hidimstat import CFI, PFI
from hidimstat.conditional_sampling import ConditionalSampler
from hidimstat.statistical_tools.conditional_sampling import ConditionalSampler

rng = np.random.RandomState(0)

Expand Down
12 changes: 2 additions & 10 deletions src/hidimstat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,7 @@
)
from .distilled_conditional_randomization_test import d0crt, D0CRT
from .conditional_feature_importance import CFI
from .knockoffs import (
model_x_knockoff,
model_x_knockoff_pvalue,
model_x_knockoff_bootstrap_quantile,
model_x_knockoff_bootstrap_e_value,
)
from .knockoffs import ModelXKnockoff
from .leave_one_covariate_out import LOCO
from .noise_std import reid
from .permutation_feature_importance import PFI
Expand All @@ -44,10 +39,7 @@
"desparsified_lasso_pvalue",
"desparsified_group_lasso_pvalue",
"reid",
"model_x_knockoff",
"model_x_knockoff_pvalue",
"model_x_knockoff_bootstrap_quantile",
"model_x_knockoff_bootstrap_e_value",
"ModelXKnockoff",
"CFI",
"LOCO",
"PFI",
Expand Down
Loading
Loading