4646.. footbibliography::
4747
4848"""
49- import matplotlib .pyplot as plt
50- import numpy as np
51- from sklearn .cluster import FeatureAgglomeration
52- from sklearn .feature_extraction import image
53- from sklearn .preprocessing import StandardScaler
54-
55- from hidimstat ._utils .scenario import multivariate_simulation_spatial
56- from hidimstat .desparsified_lasso import desparsified_lasso , desparsified_lasso_pvalue
57- from hidimstat .ensemble_clustered_inference import (
58- clustered_inference ,
59- clustered_inference_pvalue ,
60- ensemble_clustered_inference ,
61- ensemble_clustered_inference_pvalue ,
62- )
63- from hidimstat .statistical_tools .p_values import zscore_from_pval
64-
6549# %%
6650# Generating the data
6751# -------------------
7155# example.
7256
7357# simulation parameters
58+
59+ from hidimstat ._utils .scenario import multivariate_simulation_spatial
60+
7461n_samples = 100
7562shape = (40 , 40 )
7663n_features = shape [1 ] * shape [0 ]
8370 n_samples , shape , roi_size , signal_noise_ratio , smooth_X , seed = 0
8471)
8572
73+
8674# %%
8775# Choosing inference parameters
8876# -----------------------------
11098delta = 6
11199
112100# number of worker
113- n_jobs = 3
101+ n_jobs = 4
102+
114103
115104# %%
116105# Computing z-score thresholds for support estimation
125114# consists in dividing by the number of clusters.
126115
127116
117+ from hidimstat .statistical_tools .p_values import zscore_from_pval
118+
128119# computing the z-score thresholds for feature selection
129120correction_no_cluster = 1.0 / n_features
130121correction_cluster = 1.0 / n_clusters
131122thr_c = zscore_from_pval ((fwer_target / 2 ) * correction_cluster )
132123thr_nc = zscore_from_pval ((fwer_target / 2 ) * correction_no_cluster )
133124
125+
134126# %%
135127# Inference with several algorithms
136128# ---------------------------------
139131# the theoretical tolerance region.
140132
141133
134+ import numpy as np
135+
136+
142137# The following function builds a 2D map with four active regions that are
143138# enfolded by thin tolerance regions.
144139def weight_map_2D_extended (shape , roi_size , delta ):
@@ -174,6 +169,7 @@ def weight_map_2D_extended(shape, roi_size, delta):
174169# compute true support with visible spatial tolerance
175170beta_extended = weight_map_2D_extended (shape , roi_size , delta )
176171
172+
177173# %%
178174# Now, we compute the support estimated by a high-dimensional statistical
179175# inference method that does not leverage the data structure.
@@ -183,56 +179,53 @@ def weight_map_2D_extended(shape, roi_size, delta):
183179# and referred to as Desparsified Lasso.
184180
185181
182+ from hidimstat import DesparsifiedLasso
183+
186184# compute desparsified lasso
187- beta_hat , sigma_hat , precision_diagonal = desparsified_lasso (
188- X_init ,
189- y ,
190- n_jobs = n_jobs ,
191- random_state = 0 ,
192- )
193- pval , pval_corr , one_minus_pval , one_minus_pval_corr , cb_min , cb_max = (
194- desparsified_lasso_pvalue (
195- X_init .shape [0 ],
196- beta_hat ,
197- sigma_hat ,
198- precision_diagonal ,
199- )
200- )
185+ desparsified_lasso = DesparsifiedLasso (n_jobs = n_jobs , random_state = 0 )
186+ desparsified_lasso .fit_importance (X_init , y )
201187
202188# compute estimated support (first method)
203- zscore = zscore_from_pval (pval , one_minus_pval )
189+ zscore = zscore_from_pval (
190+ desparsified_lasso .pvalues_ , desparsified_lasso .one_minus_pvalues_
191+ )
204192selected_dl = zscore > thr_nc # use the "no clustering threshold"
205193
206194# compute estimated support (second method)
207195selected_dl = np .logical_or (
208- pval_corr < fwer_target / 2 , one_minus_pval_corr < fwer_target / 2
196+ desparsified_lasso .pvalues_corr_ < fwer_target / 2 ,
197+ desparsified_lasso .one_minus_pvalues_corr_ < fwer_target / 2 ,
209198)
210199
200+
211201# %%
212202# Now, we compute the support estimated using a clustered inference algorithm
213203# (c.f. :footcite:t:`chevalier2022spatially`) called Clustered Desparsified Lasso
214204# (CluDL) since it uses the Desparsified Lasso technique after clustering the data.
215205
216206# Define the FeatureAgglomeration object that performs the clustering.
217207# This object is necessary to run the current algorithm and the following one.
208+
209+ from sklearn .cluster import FeatureAgglomeration
210+ from sklearn .feature_extraction import image
211+ from sklearn .preprocessing import StandardScaler
212+
213+ from hidimstat .ensemble_clustered_inference import (
214+ clustered_inference ,
215+ clustered_inference_pvalue ,
216+ )
217+
218218connectivity = image .grid_to_graph (n_x = shape [0 ], n_y = shape [1 ])
219219ward = FeatureAgglomeration (
220220 n_clusters = n_clusters , connectivity = connectivity , linkage = "ward"
221221)
222222
223223# clustered desparsified lasso (CluDL)
224- ward_ , beta_hat , theta_hat , omega_diag = clustered_inference (
224+ ward_ , desparsified_lasso_ = clustered_inference (
225225 X_init , y , ward , scaler_sampling = StandardScaler (), random_state = 0
226226)
227227beta_hat , pval , pval_corr , one_minus_pval , one_minus_pval_corr = (
228- clustered_inference_pvalue (
229- n_samples ,
230- False ,
231- ward_ ,
232- beta_hat ,
233- theta_hat ,
234- omega_diag ,
235- )
228+ clustered_inference_pvalue (n_samples , False , ward_ , desparsified_lasso_ )
236229)
237230
238231# compute estimated support (first method)
@@ -244,40 +237,46 @@ def weight_map_2D_extended(shape, roi_size, delta):
244237 pval_corr < fwer_target / 2 , one_minus_pval_corr < fwer_target / 2
245238)
246239
240+
247241# %%
248242# Finally, we compute the support estimated by an ensembled clustered
249243# inference algorithm (c.f. :footcite:t:`chevalier2022spatially`). This algorithm is called
250244# Ensemble of Clustered Desparsified Lasso (EnCluDL) since it runs several
251245# CluDL algorithms with different clustering choices. The different CluDL
252246# solutions are then aggregated into one.
253247
248+ from hidimstat .ensemble_clustered_inference import (
249+ ensemble_clustered_inference ,
250+ ensemble_clustered_inference_pvalue ,
251+ )
252+
254253# ensemble of clustered desparsified lasso (EnCluDL)
255- list_ward , list_beta_hat , list_theta_hat , list_omega_diag = (
256- ensemble_clustered_inference (
257- X_init ,
258- y ,
259- ward ,
260- scaler_sampling = StandardScaler (),
261- random_state = 0 ,
262- )
254+ list_ward , list_desparsified_lasso = ensemble_clustered_inference (
255+ X_init ,
256+ y ,
257+ ward ,
258+ scaler_sampling = StandardScaler (),
259+ random_state = 0 ,
260+ n_jobs = n_jobs ,
263261)
264262beta_hat , selected_ecdl = ensemble_clustered_inference_pvalue (
265263 n_samples ,
266264 False ,
267265 list_ward ,
268- list_beta_hat ,
269- list_theta_hat ,
270- list_omega_diag ,
266+ list_desparsified_lasso ,
271267 fdr = fwer_target ,
272268)
273269
270+
274271# %%
275272# Results
276273# -------
277274#
278275# Now we plot the true support, the theoretical tolerance regions and
279276# the estimated supports for every method.
280277
278+ import matplotlib .pyplot as plt
279+
281280
282281# To generate a plot that exhibits
283282# the true support and the estimated supports for every method,
@@ -342,6 +341,7 @@ def plot(maps, titles):
342341
343342plot (maps , titles )
344343
344+
345345# %%
346346# Analysis of the results
347347# -----------------------
0 commit comments