Skip to content
Merged
Show file tree
Hide file tree
Changes from 53 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
72094ff
Improve reproducbility example
lionelkusch Aug 22, 2025
fb8acdb
Improve reproducibility in code base
lionelkusch Aug 22, 2025
93fd34f
Test for testing reproducibilities
lionelkusch Aug 22, 2025
4261f07
fix error
lionelkusch Aug 22, 2025
7ea339f
Merge branch 'main' into PR_remove_randomize
lionelkusch Aug 25, 2025
46e9961
homogenize managment of seed in examples
lionelkusch Aug 25, 2025
b5a4b08
Merge branch 'main' into PR_remove_randomize
lionelkusch Aug 25, 2025
8085706
fix randomize in plot_konckoff_aggregation
lionelkusch Aug 25, 2025
e68a9a3
fix error in plot
lionelkusch Aug 25, 2025
0b90779
fix path
lionelkusch Aug 25, 2025
8f4011f
fix seed for seaborn
lionelkusch Aug 25, 2025
fc0d21f
improve seed setting
lionelkusch Aug 26, 2025
99ed1e6
change seed in methods
lionelkusch Aug 26, 2025
6f98698
Merge branch 'main' into PR_remove_randomize
lionelkusch Aug 26, 2025
9ac2059
Fix seed in test and example
lionelkusch Aug 26, 2025
370f8d2
Merge branch 'main' into PR_remove_randomize
lionelkusch Aug 27, 2025
5d5a6d8
Fix seeds
lionelkusch Aug 27, 2025
25f0b3e
Apply suggestions from code review
lionelkusch Aug 28, 2025
7782e6f
remove some seed
lionelkusch Aug 28, 2025
f21edd7
change seed management
lionelkusch Aug 28, 2025
1dfcb63
remove rng
lionelkusch Aug 28, 2025
54fe2e9
fix name
lionelkusch Aug 28, 2025
072d319
change seed
lionelkusch Sep 2, 2025
814c24b
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 2, 2025
469857f
Apply suggestions from code review
lionelkusch Sep 3, 2025
02133c8
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 3, 2025
1dd12cc
fix bug for random_generator in pertubation cases
lionelkusch Sep 3, 2025
d6b8d1e
fix test
lionelkusch Sep 3, 2025
6718931
fix randomization
lionelkusch Sep 3, 2025
729b28a
update the way to set seeds
lionelkusch Sep 4, 2025
02a4c64
Update src/hidimstat/conditional_feature_importance.py
jpaillard Sep 8, 2025
d6314ca
add a new way to check random
lionelkusch Sep 9, 2025
930c24e
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 9, 2025
f9df301
Apply suggestions from code review
lionelkusch Sep 9, 2025
347a6af
change predict signature
lionelkusch Sep 9, 2025
05ca2b8
update docstring of check_random_state
lionelkusch Sep 9, 2025
b13f7b2
improvement gestion of seed
lionelkusch Sep 9, 2025
fad5127
remove management of seed
lionelkusch Sep 10, 2025
8fd630c
remove some management of seeds
lionelkusch Sep 10, 2025
f1eec67
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 10, 2025
7b50103
fix missing revert
lionelkusch Sep 10, 2025
bf4a909
fix clone
lionelkusch Sep 10, 2025
999cf8f
fix tests
lionelkusch Sep 10, 2025
3531d06
fix tests
lionelkusch Sep 10, 2025
f1faa3c
Apply suggestions from code review
lionelkusch Sep 11, 2025
8ff07ea
Apply suggestions from code review
lionelkusch Sep 11, 2025
dfc1c9b
Apply suggestions from code review
lionelkusch Sep 11, 2025
8a0b9b8
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 11, 2025
a232fa6
format document
lionelkusch Sep 11, 2025
6b5f521
Merge branch 'main' into PR_remove_randomize
lionelkusch Sep 23, 2025
31ba5f5
fix example
lionelkusch Sep 23, 2025
08fa0d4
fix language
lionelkusch Sep 23, 2025
aba9798
fix order import
lionelkusch Sep 23, 2025
ce15242
Apply suggestions from code review
lionelkusch Sep 23, 2025
368c0d8
chnage random state to generator
lionelkusch Sep 23, 2025
c8734d1
fix example
lionelkusch Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions examples/plot_2D_simulation_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@

# generating the data
X_init, y, beta, epsilon = multivariate_simulation_spatial(
n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=1
n_samples, shape, roi_size, signal_noise_ratio, smooth_X, seed=0
)

# %%
Expand Down Expand Up @@ -188,6 +188,7 @@ def weight_map_2D_extended(shape, roi_size, delta):
X_init,
y,
n_jobs=n_jobs,
seed=0,
)
pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
desparsified_lasso_pvalue(
Expand Down Expand Up @@ -221,7 +222,7 @@ def weight_map_2D_extended(shape, roi_size, delta):

# clustered desparsified lasso (CluDL)
ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
X_init, y, ward, n_clusters, scaler_sampling=StandardScaler()
X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=0
)
beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
clustered_inference_pvalue(
Expand Down Expand Up @@ -253,11 +254,7 @@ def weight_map_2D_extended(shape, roi_size, delta):
# ensemble of clustered desparsified lasso (EnCluDL)
list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
ensemble_clustered_inference(
X_init,
y,
ward,
n_clusters,
scaler_sampling=StandardScaler(),
X_init, y, ward, n_clusters, scaler_sampling=StandardScaler(), seed=0
)
)
beta_hat, selected_ecdl = ensemble_clustered_inference_pvalue(
Expand Down
10 changes: 5 additions & 5 deletions examples/plot_conditional_vs_marginal_xor_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
X,
Y,
test_size=0.2,
random_state=0,
random_state=1,
)
model = SVC(kernel="rbf", random_state=0)
model = SVC(kernel="rbf", random_state=2)
model.fit(X_train, y_train)


Expand Down Expand Up @@ -88,8 +88,8 @@
# features. Conditional importance, on the other hand, reveals that both features
# are important (therefore rejecting the null hypothesis
# :math:`Y \perp\!\!\!\perp X^1 | X^2`).
cv = KFold(n_splits=5, shuffle=True, random_state=0)
clf = SVC(kernel="rbf", random_state=0)
cv = KFold(n_splits=5, shuffle=True, random_state=3)
clf = SVC(kernel="rbf", random_state=4)

# %%
# Compute marginal importance using univariate models.
Expand Down Expand Up @@ -126,7 +126,7 @@
loss=hinge_loss,
imputation_model_continuous=RidgeCV(np.logspace(-3, 3, 10)),
n_permutations=50,
random_state=0,
random_state=5,
)
vim.fit(X_train, y_train)
importances.append(vim.importance(X_test, y_test)["importance"])
Expand Down
10 changes: 5 additions & 5 deletions examples/plot_dcrt_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
results_list = []
for sim_ind in range(10):
print(f"Processing: {sim_ind+1}")
np.random.seed(sim_ind)

# Number of observations
n = 100
Expand Down Expand Up @@ -55,7 +54,9 @@

## dcrt Lasso ##
d0crt_lasso = D0CRT(
estimator=LassoCV(random_state=42, n_jobs=1), screening_threshold=None
estimator=LassoCV(random_state=sim_ind, n_jobs=1),
screening_threshold=None,
random_state=sim_ind,
)
d0crt_lasso.fit_importance(X, y)
pvals_lasso = d0crt_lasso.pvalues_
Expand All @@ -71,11 +72,10 @@
## dcrt Random Forest ##
d0crt_random_forest = D0CRT(
estimator=RandomForestRegressor(
n_estimators=100,
random_state=42,
n_jobs=1,
n_estimators=100, random_state=sim_ind, n_jobs=1
),
screening_threshold=None,
random_state=sim_ind,
)
d0crt_random_forest.fit_importance(X, y)
pvals_forest = d0crt_random_forest.pvalues_
Expand Down
19 changes: 12 additions & 7 deletions examples/plot_diabetes_variable_importance_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@
# diabetes dataset.

n_folds = 5
regressor = RidgeCV(alphas=np.logspace(-3, 3, 10))
regressor = RidgeCV(
alphas=np.logspace(-3, 3, 10),
cv=KFold(shuffle=True, random_state=20),
)
regressor_list = [clone(regressor) for _ in range(n_folds)]
kf = KFold(n_splits=n_folds, shuffle=True, random_state=0)
kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
for i, (train_index, test_index) in enumerate(kf.split(X)):
regressor_list[i].fit(X[train_index], y[train_index])
score = r2_score(
Expand Down Expand Up @@ -112,19 +115,21 @@
# --------------------------------------------------------

cfi_importance_list = []
kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
for i, (train_index, test_index) in enumerate(kf.split(X)):
print(f"Fold {i}")
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
cfi = CFI(
estimator=regressor_list[i],
imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10)),
imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10), cv=KFold()),
imputation_model_categorical=LogisticRegressionCV(
Cs=np.logspace(-2, 2, 10),
cv=KFold(),
),
# covariate_estimator=HistGradientBoostingRegressor(random_state=0,),
n_permutations=50,
random_state=0,
random_state=24,
n_jobs=4,
)
cfi.fit(X_train, y_train)
Expand All @@ -136,7 +141,7 @@
# ---------------------------------------------------------

loco_importance_list = []

kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
for i, (train_index, test_index) in enumerate(kf.split(X)):
print(f"Fold {i}")
X_train, X_test = X[train_index], X[test_index]
Expand All @@ -155,15 +160,15 @@
# ----------------------------------------------------------------

pfi_importance_list = []

kf = KFold(n_splits=n_folds, shuffle=True, random_state=21)
for i, (train_index, test_index) in enumerate(kf.split(X)):
print(f"Fold {i}")
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
pfi = PFI(
estimator=regressor_list[i],
n_permutations=50,
random_state=0,
random_state=25,
n_jobs=4,
)
pfi.fit(X_train, y_train)
Expand Down
18 changes: 13 additions & 5 deletions examples/plot_fmri_data_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
new_soft_limit = limit_5G if soft < 0 else min(limit_5G, soft)
new_hard_limit = limit_5G if hard < 0 else min(limit_5G, hard)
resource.setrlimit(resource.RLIMIT_AS, (new_soft_limit, new_hard_limit))
n_job = 1
n_jobs = 1


# %%
Expand Down Expand Up @@ -149,7 +149,7 @@ def preprocess_haxby(subject=2, memory=None):
#
try:
beta_hat, sigma_hat, precision_diagonal = desparsified_lasso(
X, y, noise_method="median", max_iteration=1000
X, y, noise_method="median", max_iteration=1000, seed=0, n_jobs=n_jobs
)
pval_dl, _, one_minus_pval_dl, _, cb_min, cb_max = desparsified_lasso_pvalue(
X.shape[0], beta_hat, sigma_hat, precision_diagonal
Expand All @@ -163,7 +163,14 @@ def preprocess_haxby(subject=2, memory=None):
# Now, the clustered inference algorithm which combines parcellation
# and high-dimensional inference (c.f. References).
ward_, beta_hat, theta_hat, omega_diag = clustered_inference(
X, y, ward, n_clusters, scaler_sampling=StandardScaler(), tolerance=1e-2
X,
y,
ward,
n_clusters,
scaler_sampling=StandardScaler(),
tolerance=1e-2,
seed=1,
n_jobs=n_jobs,
)
beta_hat, pval_cdl, _, one_minus_pval_cdl, _ = clustered_inference_pvalue(
X.shape[0], None, ward_, beta_hat, theta_hat, omega_diag
Expand All @@ -176,7 +183,7 @@ def preprocess_haxby(subject=2, memory=None):
# which means that 5 different parcellations are considered and
# then 5 statistical maps are produced and aggregated into one.
# However you might benefit from clustering randomization taking
# `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs=2`.
# `n_bootstraps=25` or `n_bootstraps=100`, also we set `n_jobs`.
list_ward, list_beta_hat, list_theta_hat, list_omega_diag = (
ensemble_clustered_inference(
X,
Expand All @@ -188,7 +195,8 @@ def preprocess_haxby(subject=2, memory=None):
n_bootstraps=5,
max_iteration=6000,
tolerance=1e-2,
n_jobs=2,
seed=2,
n_jobs=n_jobs,
)
)
beta_hat, selected = ensemble_clustered_inference_pvalue(
Expand Down
26 changes: 19 additions & 7 deletions examples/plot_importance_classification_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@

from hidimstat import CFI, PFI

# Define the seeds for the reproducibility of the example
rng = np.random.RandomState(0)
# %%
# Load the iris dataset and add a spurious feature
# ------------------------------------------------
Expand All @@ -45,7 +47,6 @@
# contrarily to `CFI`.

dataset = load_iris()
rng = np.random.RandomState(0)
X, y = dataset.data, dataset.target
spurious_feat = X[:, 2] + X[:, 3]
spurious_feat += rng.normal(size=X.shape[0], scale=np.std(spurious_feat) / 2)
Expand Down Expand Up @@ -86,17 +87,19 @@ def run_one_fold(
if vim_name == "CFI":
vim = CFI(
estimator=model_c,
imputation_model_continuous=RidgeCV(alphas=np.logspace(-3, 3, 10)),
imputation_model_continuous=RidgeCV(
alphas=np.logspace(-3, 3, 10), cv=KFold(shuffle=True, random_state=1)
),
n_permutations=50,
random_state=0,
random_state=2,
method=method,
loss=loss,
)
elif vim_name == "PFI":
vim = PFI(
estimator=model_c,
n_permutations=50,
random_state=0,
random_state=3,
method=method,
loss=loss,
)
Expand Down Expand Up @@ -124,10 +127,19 @@ def run_one_fold(
# combination, in parallel.

models = [
LogisticRegressionCV(Cs=np.logspace(-3, 3, 10), tol=1e-3, max_iter=1000),
GridSearchCV(SVC(kernel="rbf"), {"C": np.logspace(-3, 3, 10)}),
LogisticRegressionCV(
Cs=np.logspace(-3, 3, 10),
tol=1e-3,
max_iter=1000,
cv=KFold(shuffle=True, random_state=4),
),
GridSearchCV(
SVC(kernel="rbf"),
{"C": np.logspace(-3, 3, 10)},
cv=KFold(shuffle=True, random_state=5),
),
]
cv = KFold(n_splits=5, shuffle=True, random_state=0)
cv = KFold(n_splits=5, shuffle=True, random_state=6)
groups = {ft: [i] for i, ft in enumerate(dataset.feature_names)}
out_list = Parallel(n_jobs=5)(
delayed(run_one_fold)(
Expand Down
20 changes: 9 additions & 11 deletions examples/plot_knockoff_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from joblib import Parallel, delayed
from sklearn.linear_model import LassoCV
from sklearn.model_selection import KFold
from sklearn.utils import check_random_state

from hidimstat._utils.scenario import multivariate_simulation
from hidimstat.knockoffs import (
Expand Down Expand Up @@ -54,15 +53,12 @@
signal_noise_ratio = 10
# number of repetitions for the bootstraps
n_bootstraps = 25
# seed for the random generator
seed = 45
# number of jobs for repetition of the method
n_jobs = 2
# verbosity of the joblib
joblib_verbose = 0

rng = check_random_state(seed)
seed_list = rng.randint(1, np.iinfo(np.int32).max, runs)
# Define the seeds for the reproducibility of the example
rng = np.random.RandomState(42)


# %%
Expand Down Expand Up @@ -96,9 +92,10 @@ def single_run(
estimator=LassoCV(
n_jobs=1,
cv=KFold(n_splits=5, shuffle=True, random_state=0),
random_state=1,
),
n_bootstraps=1,
random_state=seed,
random_state=2,
)
mx_selection, _ = model_x_knockoff_pvalue(test_scores, fdr=fdr)
fdp_mx, power_mx = fdp_power(mx_selection, non_zero_index)
Expand All @@ -109,11 +106,12 @@ def single_run(
y,
estimator=LassoCV(
n_jobs=1,
cv=KFold(n_splits=5, shuffle=True, random_state=0),
cv=KFold(n_splits=5, shuffle=True, random_state=3),
random_state=4,
),
n_bootstraps=n_bootstraps,
n_jobs=1,
random_state=seed,
random_state=5,
)

# Use p-values aggregation [2]
Expand Down Expand Up @@ -141,7 +139,7 @@ def plot_results(bounds, fdr, n_samples, n_features, power=False):
for nb in range(len(bounds)):
for i in range(len(bounds[nb])):
y = bounds[nb][i]
x = np.random.normal(nb + 1, 0.05)
x = rng.normal(nb + 1, 0.05)
plt.scatter(x, y, alpha=0.65, c="blue")

plt.boxplot(bounds, sym="")
Expand Down Expand Up @@ -184,7 +182,7 @@ def effect_number_samples(n_samples):
n_bootstraps,
seed=seed,
)
for seed in seed_list
for seed in range(runs)
)

fdps_mx = []
Expand Down
Loading
Loading