1+ import numpy as np
2+ from joblib import Parallel , delayed
3+ from sklearn .base import check_is_fitted , clone
4+ from sklearn .metrics import root_mean_squared_error
5+ from typing import override
6+
7+ from hidimstat ._utils .utils import _check_vim_predict_method
8+ from hidimstat .base_variable_importance import (
9+ BaseVariableImportance ,
10+ VariableImportanceFeatureGroup ,
11+ )
12+
13+
14+ class LeaveOneCovariateIn (BaseVariableImportance , VariableImportanceFeatureGroup ):
15+ def __init__ (
16+ self ,
17+ estimator ,
18+ loss : callable = root_mean_squared_error ,
19+ method : str = "predict" ,
20+ n_jobs : int = 1 ,
21+ ):
22+ """
23+ Leave One Covariate In.
24+ For more details, see the section 7.2 of :footcite:t:`ewald2024guide`.
25+
26+ Parameters
27+ ----------
28+ estimator : sklearn compatible estimator, optional
29+ The estimator to use for the prediction.
30+ loss : callable, default=root_mean_squared_error
31+ The function to compute the loss when comparing the perturbed model
32+ to the original model.
33+ method : str, default="predict"
34+ The method used for making predictions. This determines the predictions
35+ passed to the loss function. Supported methods are "predict",
36+ "predict_proba", "decision_function", "transform".
37+ n_jobs : int, default=1
38+ The number of parallel jobs to run. Parallelization is done over the
39+ features or groups of features.
40+ """
41+ super ().__init__ ()
42+ check_is_fitted (estimator )
43+ self .estimator = estimator
44+ self .loss = loss
45+ _check_vim_predict_method (method )
46+ self .method = method
47+ self .n_jobs = n_jobs
48+ # generated attributes
49+ self ._list_univariate_model = []
50+ self .loss_reference_ = None
51+
52+ @override
53+ def fit (self , X , y , features_groups = None ):
54+ """
55+ Fit the marginal information variable importance model.
56+
57+ Parameters
58+ ----------
59+ X : array-like of shape (n_samples, n_features)
60+ The input samples.
61+ y : array-like of shape (n_samples,)
62+ The target values.
63+ features_groups : dict, optional
64+ A dictionary where the keys are group identifiers and the values are lists
65+ of feature indices or names for each group. If None, each feature is
66+ treated as its own group.
67+
68+ Returns
69+ -------
70+ self : object
71+ Returns the instance itself.
72+ """
73+ super ().fit (X , y , features_groups )
74+ X_ = np .asarray (X )
75+ y_ = np .asarray (y )
76+
77+ # Parallelize the computation of the importance scores for each group
78+ self ._list_univariate_model = Parallel (n_jobs = self .n_jobs )(
79+ delayed (self ._joblib_fit_one_features_group )(X_ , y_ , features_groups_ids )
80+ for features_groups_ids in self ._features_groups_ids
81+ )
82+
83+ def predict (self , X ):
84+ """
85+ Compute the predictions after perturbation of the data for each group of
86+ features.
87+
88+ Parameters
89+ ----------
90+ X : array-like of shape (n_samples, n_features)
91+ The input samples.
92+ y : array-like of shape (n_samples,)
93+ The target values.
94+
95+ Returns
96+ -------
97+ out : array-like of shape (n_features_groups, n_samples)
98+ The predictions for each group of features.
99+ """
100+ self ._check_fit (X )
101+ X_ = np .asarray (X )
102+
103+ # Parallelize the computation of the importance scores for each group
104+ out_list = Parallel (n_jobs = self .n_jobs )(
105+ delayed (self ._joblib_predict_one_features_group )(X_ , features_group_id , features_groups_ids )
106+ for features_group_id , features_groups_ids in enumerate (self ._features_groups_ids )
107+ )
108+ return np .array (out_list )
109+
110+ def importance (self , X , y ):
111+ """
112+ Compute the marginal importance scores for each group of features.
113+
114+ Parameters
115+ ----------
116+ X : array-like of shape (n_samples, n_features)
117+ The input samples.
118+ y : array-like of shape (n_samples,)
119+ The target values.
120+
121+ Returns
122+ -------
123+ out_dict : dict
124+ A dictionary containing:
125+ - 'loss_reference' : float
126+ Loss of the original model predictions
127+ - 'loss' : dict
128+ Losses for each group's univariate predictions
129+ - 'importance' : ndarray of shape (n_features_groups,)
130+ Marginal importance scores for each feature group
131+ """
132+ self ._check_fit (X )
133+
134+ y_pred = self .predict (X )
135+
136+ # reference to a dummy model
137+ if len (y_pred [0 ].shape ) == 1 or y_pred [0 ].shape [1 ] == 1 :
138+ # Regression: take the average value as reference
139+ y_ref = np .mean (y ) * np .ones_like (y_pred [0 ])
140+ self .loss_reference_ = self .loss (y , y_ref )
141+ else :
142+ # Classification: take the most frequent value
143+ values , counts = np .unique (y , return_counts = True )
144+ y_ref = np .zeros_like (y_pred [0 ])
145+ y_ref [:, np .argmax (counts )] = 1.0
146+ self .loss_reference_ = self .loss (y , y_ref )
147+
148+ self .importances_ = []
149+ for y_pred_j in y_pred :
150+ self .importances_ .append (self .loss_reference_ - self .loss (y , y_pred_j ))
151+ self .pvalues_ = None # estimated pvlaue for method
152+ return self .importances_
153+
154+ def fit_importance (self , X , y , cv , features_groups = None ):
155+ """
156+ Fits the model to the data and computes feature importance.
157+
158+ Parameters
159+ ----------
160+ X : array-like of shape (n_samples, n_features)
161+ The input data.
162+ y : array-like of shape (n_samples,)
163+ The target values.
164+ cv :
165+ Cross-validation parameter.
166+ features_groups : dict, optional
167+ A dictionary where the keys are group identifiers and the values are lists
168+ of feature indices or names for each group. If None, each feature is
169+ treated as its own group.
170+
171+ Returns
172+ -------
173+ importance : array-like
174+ The computed feature importance scores.
175+ """
176+ list_attribute_saved = ["importances_" , "pvalues_" , "_list_univariate_model" ]
177+ save_value_attributes = []
178+ for train_index , test_index in cv .split (X ):
179+ X_train , X_test = X [train_index ], X [test_index ]
180+ y_train , y_test = y [train_index ], y [test_index ]
181+ self .fit (X_train , y_train , features_groups = features_groups )
182+ self .importance (X_test , y_test )
183+ save_value_attributes .append (
184+ [getattr (self , attribute ) for attribute in list_attribute_saved ]
185+ )
186+ # create an array of attributes:
187+ for attribute in list_attribute_saved :
188+ setattr (self , attribute , [])
189+ for value_attribute in save_value_attributes :
190+ for attribute , value in zip (list_attribute_saved , value_attribute ):
191+ getattr (self , attribute ).append (value )
192+
193+ return np .mean (self .importances_ , axis = 0 )
194+
195+ def _joblib_fit_one_features_group (self , X , y , features_group_ids ):
196+ """
197+ Helper function to fit a univariate model for a single group.
198+
199+ Parameters
200+ ----------
201+ X : array-like of shape (n_samples, n_features)
202+ The input samples.
203+ y : array-like of shape (n_samples,)
204+ The target values.
205+ features_group_ids : array-like
206+ The indices of features belonging to this group.
207+
208+ Returns
209+ -------
210+ object
211+ The fitted univariate model for this group.
212+ """
213+ univariate_model = clone (self .estimator )
214+ return univariate_model .fit (X [:, features_group_ids ].reshape (- 1 , len (features_group_ids )), y )
215+
216+ def _joblib_predict_one_features_group (self , X , index_features_group , features_group_ids ):
217+ """
218+ Helper function to predict for a single group.
219+
220+ Parameters
221+ ----------
222+ X : array-like of shape (n_samples, n_features)
223+ The input samples.
224+ index_features_group : int
225+ The index of the group in _list_univariate_model.
226+ features_group_ids : array-like
227+ The indices of features belonging to this group.
228+
229+ Returns
230+ -------
231+ float
232+ The prediction score for this group.
233+ """
234+ y_pred_loci = getattr (self ._list_univariate_model [index_features_group ], self .method )(
235+ X [:, features_group_ids ].reshape (- 1 , len (features_group_ids ))
236+ )
237+ return y_pred_loci
0 commit comments