@@ -20,21 +20,27 @@ class AdapterScikitLearn(BaseVariableImportance):
2020
2121 Notes
2222 -----
23- Subclasses should implement the `fit` method .
23+ Subclasses should implement the `importance` methods .
2424 """
2525
2626 def fit (self , X = None , y = None ):
2727 """
2828 Fit the feature selection model to the data.
29- Do nothing because there is no need of fitting
29+
30+ This method does nothing because fitting is not required for these
31+ scikit-learn feature selection methods.
3032
3133 Parameters
3234 ----------
33- X : array-like of shape (n_samples, n_features)
35+ X : array-like of shape (n_samples, n_features), optional
3436 (not used) Input data matrix.
35- y : array-like of shape (n_samples,)
37+ y : array-like of shape (n_samples,), optional
3638 (not used) Target values.
3739
40+ Returns
41+ -------
42+ self : object
43+ Returns self.
3844 """
3945 if X is not None :
4046 warnings .warn ("X won't be used" )
@@ -46,6 +52,9 @@ def importance(self, X, y):
4652 """
4753 Return the computed feature importances.
4854
55+ This method should be implemented by subclasses to compute feature
56+ importances for the given data.
57+
4958 Parameters
5059 ----------
5160 X : array-like of shape (n_samples, n_features)
@@ -64,6 +73,9 @@ def fit_importance(self, X, y, cv=None):
6473 """
6574 Fit the model and compute feature importances.
6675
76+ This method fits the model (if necessary) and computes feature
77+ importances for the given data.
78+
6779 Parameters
6880 ----------
6981 X : array-like of shape (n_samples, n_features)
@@ -88,17 +100,17 @@ class AnalysisOfVariance(AdapterScikitLearn):
88100 """
89101 Analysis of Variance (ANOVA) :footcite:t:`fisher1970statistical` feature
90102 selection for classification tasks.
91- For short summary of this method, you can read this paper
92- :footcite:t:`larson2008analysis`.
93103
94- Uses scikit-learn's f_classif to compute F-statistics and p-values for each feature.
104+ This class uses scikit-learn's f_classif to compute F-statistics and p-values
105+ for each feature. For a short summary of this method, see
106+ :footcite:t:`larson2008analysis`.
95107
96108 Attributes
97109 ----------
98110 importances_ : ndarray
99- P- values for each feature.
111+ 1 - p- values for each feature (higher is more important) .
100112 pvalues_ : ndarray
101- P -values for each feature.
113+ 1 - p -values for each feature.
102114 f_statitstic_ : ndarray
103115 F-statistics for each feature.
104116
@@ -113,7 +125,7 @@ def __init__(self):
113125 @override
114126 def importance (self , X , y ):
115127 """
116- Fit the ANOVA model to the data .
128+ Compute ANOVA F-statistics and p-values for each feature .
117129
118130 Parameters
119131 ----------
@@ -125,11 +137,16 @@ def importance(self, X, y):
125137 Sets
126138 ----
127139 importances_ : ndarray
128- P -values for each feature.
140+ 1 - p -values for each feature.
129141 pvalues_ : ndarray
130- P -values for each feature.
142+ 1 - p -values for each feature.
131143 f_statitstic_ : ndarray
132144 F-statistics for each feature.
145+
146+ Returns
147+ -------
148+ importances_ : ndarray
149+ 1 - p-values for each feature.
133150 """
134151 f_statistic , p_values = f_classif (X , y )
135152 # Test the opposite hypothese to the anova
@@ -143,7 +160,9 @@ def importance(self, X, y):
143160class UnivariateLinearRegressionTests (AdapterScikitLearn ):
144161 """
145162 Univariate linear regression F-test for regression tasks.
146- This test is also known as Chow test :footcite:t:`chow1960tests`
163+
164+ This test is also known as the Chow test :footcite:t:`chow1960tests`.
165+ Uses scikit-learn's f_regression to compute F-statistics and p-values for each feature.
147166
148167 Parameters
149168 ----------
@@ -155,9 +174,9 @@ class UnivariateLinearRegressionTests(AdapterScikitLearn):
155174 Attributes
156175 ----------
157176 importances_ : ndarray
158- P -values for each feature.
177+ 1 - p -values for each feature.
159178 pvalues_ : ndarray
160- P -values for each feature.
179+ 1 - p -values for each feature.
161180 f_statitstic_ : ndarray
162181 F-statistics for each feature.
163182
@@ -175,7 +194,7 @@ def __init__(self, center=True, force_finite=True):
175194 @override
176195 def importance (self , X , y ):
177196 """
178- Fit the univariate linear regression F-test model to the data .
197+ Compute univariate linear regression F-statistics and p-values for each feature .
179198
180199 Parameters
181200 ----------
@@ -187,11 +206,16 @@ def importance(self, X, y):
187206 Sets
188207 ----
189208 importances_ : ndarray
190- P -values for each feature.
209+ 1 - p -values for each feature.
191210 pvalues_ : ndarray
192- P -values for each feature.
211+ 1 - p -values for each feature.
193212 f_statitstic_ : ndarray
194213 F-statistics for each feature.
214+
215+ Returns
216+ -------
217+ importances_ : ndarray
218+ 1 - p-values for each feature.
195219 """
196220 f_statistic , p_values = f_regression (
197221 X , y , center = self .center , force_finite = self .force_finite
@@ -207,9 +231,9 @@ def importance(self, X, y):
207231class MutualInformation (AdapterScikitLearn ):
208232 """
209233 Mutual information feature selection for regression or classification.
210- This method was introduce by Shannon :footcite:t:`shannon1948mathematical`
211- but for an introduction, you can look the section 2.4 of this book
212- :footcite:t:`cover1999elements` .
234+
235+ This method was introduced by Shannon :footcite:t:`shannon1948mathematical`.
236+ For an introduction, see section 2.4 of :footcite:t:`cover1999elements`.
213237
214238 Parameters
215239 ----------
@@ -256,7 +280,7 @@ def __init__(
256280 @override
257281 def importance (self , X , y ):
258282 """
259- Fit the mutual information model to the data .
283+ Compute mutual information scores for each feature .
260284
261285 Parameters
262286 ----------
@@ -271,6 +295,11 @@ def importance(self, X, y):
271295 Mutual information scores for each feature.
272296 pvalues_ : None
273297 P-values are not computed for mutual information.
298+
299+ Returns
300+ -------
301+ importances_ : ndarray
302+ Mutual information scores for each feature.
274303 """
275304 if self .problem_type == "regression" :
276305 mutual_information = mutual_info_regression (
0 commit comments