@@ -20,21 +20,27 @@ class AdapterScikitLearn(BaseVariableImportance):
20
20
21
21
Notes
22
22
-----
23
- Subclasses should implement the `fit` method .
23
+ Subclasses should implement the `importance` methods .
24
24
"""
25
25
26
26
def fit (self , X = None , y = None ):
27
27
"""
28
28
Fit the feature selection model to the data.
29
- Do nothing because there is no need of fitting
29
+
30
+ This method does nothing because fitting is not required for these
31
+ scikit-learn feature selection methods.
30
32
31
33
Parameters
32
34
----------
33
- X : array-like of shape (n_samples, n_features)
35
+ X : array-like of shape (n_samples, n_features), optional
34
36
(not used) Input data matrix.
35
- y : array-like of shape (n_samples,)
37
+ y : array-like of shape (n_samples,), optional
36
38
(not used) Target values.
37
39
40
+ Returns
41
+ -------
42
+ self : object
43
+ Returns self.
38
44
"""
39
45
if X is not None :
40
46
warnings .warn ("X won't be used" )
@@ -46,6 +52,9 @@ def importance(self, X, y):
46
52
"""
47
53
Return the computed feature importances.
48
54
55
+ This method should be implemented by subclasses to compute feature
56
+ importances for the given data.
57
+
49
58
Parameters
50
59
----------
51
60
X : array-like of shape (n_samples, n_features)
@@ -64,6 +73,9 @@ def fit_importance(self, X, y, cv=None):
64
73
"""
65
74
Fit the model and compute feature importances.
66
75
76
+ This method fits the model (if necessary) and computes feature
77
+ importances for the given data.
78
+
67
79
Parameters
68
80
----------
69
81
X : array-like of shape (n_samples, n_features)
@@ -88,17 +100,17 @@ class AnalysisOfVariance(AdapterScikitLearn):
88
100
"""
89
101
Analysis of Variance (ANOVA) :footcite:t:`fisher1970statistical` feature
90
102
selection for classification tasks.
91
- For short summary of this method, you can read this paper
92
- :footcite:t:`larson2008analysis`.
93
103
94
- Uses scikit-learn's f_classif to compute F-statistics and p-values for each feature.
104
+ This class uses scikit-learn's f_classif to compute F-statistics and p-values
105
+ for each feature. For a short summary of this method, see
106
+ :footcite:t:`larson2008analysis`.
95
107
96
108
Attributes
97
109
----------
98
110
importances_ : ndarray
99
- P- values for each feature.
111
+ 1 - p- values for each feature (higher is more important) .
100
112
pvalues_ : ndarray
101
- P -values for each feature.
113
+ 1 - p -values for each feature.
102
114
f_statitstic_ : ndarray
103
115
F-statistics for each feature.
104
116
@@ -113,7 +125,7 @@ def __init__(self):
113
125
@override
114
126
def importance (self , X , y ):
115
127
"""
116
- Fit the ANOVA model to the data .
128
+ Compute ANOVA F-statistics and p-values for each feature .
117
129
118
130
Parameters
119
131
----------
@@ -125,11 +137,16 @@ def importance(self, X, y):
125
137
Sets
126
138
----
127
139
importances_ : ndarray
128
- P -values for each feature.
140
+ 1 - p -values for each feature.
129
141
pvalues_ : ndarray
130
- P -values for each feature.
142
+ 1 - p -values for each feature.
131
143
f_statitstic_ : ndarray
132
144
F-statistics for each feature.
145
+
146
+ Returns
147
+ -------
148
+ importances_ : ndarray
149
+ 1 - p-values for each feature.
133
150
"""
134
151
f_statistic , p_values = f_classif (X , y )
135
152
# Test the opposite hypothese to the anova
@@ -143,7 +160,9 @@ def importance(self, X, y):
143
160
class UnivariateLinearRegressionTests (AdapterScikitLearn ):
144
161
"""
145
162
Univariate linear regression F-test for regression tasks.
146
- This test is also known as Chow test :footcite:t:`chow1960tests`
163
+
164
+ This test is also known as the Chow test :footcite:t:`chow1960tests`.
165
+ Uses scikit-learn's f_regression to compute F-statistics and p-values for each feature.
147
166
148
167
Parameters
149
168
----------
@@ -155,9 +174,9 @@ class UnivariateLinearRegressionTests(AdapterScikitLearn):
155
174
Attributes
156
175
----------
157
176
importances_ : ndarray
158
- P -values for each feature.
177
+ 1 - p -values for each feature.
159
178
pvalues_ : ndarray
160
- P -values for each feature.
179
+ 1 - p -values for each feature.
161
180
f_statitstic_ : ndarray
162
181
F-statistics for each feature.
163
182
@@ -175,7 +194,7 @@ def __init__(self, center=True, force_finite=True):
175
194
@override
176
195
def importance (self , X , y ):
177
196
"""
178
- Fit the univariate linear regression F-test model to the data .
197
+ Compute univariate linear regression F-statistics and p-values for each feature .
179
198
180
199
Parameters
181
200
----------
@@ -187,11 +206,16 @@ def importance(self, X, y):
187
206
Sets
188
207
----
189
208
importances_ : ndarray
190
- P -values for each feature.
209
+ 1 - p -values for each feature.
191
210
pvalues_ : ndarray
192
- P -values for each feature.
211
+ 1 - p -values for each feature.
193
212
f_statitstic_ : ndarray
194
213
F-statistics for each feature.
214
+
215
+ Returns
216
+ -------
217
+ importances_ : ndarray
218
+ 1 - p-values for each feature.
195
219
"""
196
220
f_statistic , p_values = f_regression (
197
221
X , y , center = self .center , force_finite = self .force_finite
@@ -207,9 +231,9 @@ def importance(self, X, y):
207
231
class MutualInformation (AdapterScikitLearn ):
208
232
"""
209
233
Mutual information feature selection for regression or classification.
210
- This method was introduce by Shannon :footcite:t:`shannon1948mathematical`
211
- but for an introduction, you can look the section 2.4 of this book
212
- :footcite:t:`cover1999elements` .
234
+
235
+ This method was introduced by Shannon :footcite:t:`shannon1948mathematical`.
236
+ For an introduction, see section 2.4 of :footcite:t:`cover1999elements`.
213
237
214
238
Parameters
215
239
----------
@@ -256,7 +280,7 @@ def __init__(
256
280
@override
257
281
def importance (self , X , y ):
258
282
"""
259
- Fit the mutual information model to the data .
283
+ Compute mutual information scores for each feature .
260
284
261
285
Parameters
262
286
----------
@@ -271,6 +295,11 @@ def importance(self, X, y):
271
295
Mutual information scores for each feature.
272
296
pvalues_ : None
273
297
P-values are not computed for mutual information.
298
+
299
+ Returns
300
+ -------
301
+ importances_ : ndarray
302
+ Mutual information scores for each feature.
274
303
"""
275
304
if self .problem_type == "regression" :
276
305
mutual_information = mutual_info_regression (
0 commit comments