Skip to content

Commit 3d8966e

Browse files
committed
update docstring
1 parent c7fa706 commit 3d8966e

File tree

1 file changed

+51
-22
lines changed

1 file changed

+51
-22
lines changed

src/hidimstat/marginal/selection_variable_scikit_learn.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,21 +20,27 @@ class AdapterScikitLearn(BaseVariableImportance):
2020
2121
Notes
2222
-----
23-
Subclasses should implement the `fit` method.
23+
Subclasses should implement the `importance` methods.
2424
"""
2525

2626
def fit(self, X=None, y=None):
2727
"""
2828
Fit the feature selection model to the data.
29-
Do nothing because there is no need of fitting
29+
30+
This method does nothing because fitting is not required for these
31+
scikit-learn feature selection methods.
3032
3133
Parameters
3234
----------
33-
X : array-like of shape (n_samples, n_features)
35+
X : array-like of shape (n_samples, n_features), optional
3436
(not used) Input data matrix.
35-
y : array-like of shape (n_samples,)
37+
y : array-like of shape (n_samples,), optional
3638
(not used) Target values.
3739
40+
Returns
41+
-------
42+
self : object
43+
Returns self.
3844
"""
3945
if X is not None:
4046
warnings.warn("X won't be used")
@@ -46,6 +52,9 @@ def importance(self, X, y):
4652
"""
4753
Return the computed feature importances.
4854
55+
This method should be implemented by subclasses to compute feature
56+
importances for the given data.
57+
4958
Parameters
5059
----------
5160
X : array-like of shape (n_samples, n_features)
@@ -64,6 +73,9 @@ def fit_importance(self, X, y, cv=None):
6473
"""
6574
Fit the model and compute feature importances.
6675
76+
This method fits the model (if necessary) and computes feature
77+
importances for the given data.
78+
6779
Parameters
6880
----------
6981
X : array-like of shape (n_samples, n_features)
@@ -88,17 +100,17 @@ class AnalysisOfVariance(AdapterScikitLearn):
88100
"""
89101
Analysis of Variance (ANOVA) :footcite:t:`fisher1970statistical` feature
90102
selection for classification tasks.
91-
For short summary of this method, you can read this paper
92-
:footcite:t:`larson2008analysis`.
93103
94-
Uses scikit-learn's f_classif to compute F-statistics and p-values for each feature.
104+
This class uses scikit-learn's f_classif to compute F-statistics and p-values
105+
for each feature. For a short summary of this method, see
106+
:footcite:t:`larson2008analysis`.
95107
96108
Attributes
97109
----------
98110
importances_ : ndarray
99-
P-values for each feature.
111+
1 - p-values for each feature (higher is more important).
100112
pvalues_ : ndarray
101-
P-values for each feature.
113+
1 - p-values for each feature.
102114
f_statitstic_ : ndarray
103115
F-statistics for each feature.
104116
@@ -113,7 +125,7 @@ def __init__(self):
113125
@override
114126
def importance(self, X, y):
115127
"""
116-
Fit the ANOVA model to the data.
128+
Compute ANOVA F-statistics and p-values for each feature.
117129
118130
Parameters
119131
----------
@@ -125,11 +137,16 @@ def importance(self, X, y):
125137
Sets
126138
----
127139
importances_ : ndarray
128-
P-values for each feature.
140+
1 - p-values for each feature.
129141
pvalues_ : ndarray
130-
P-values for each feature.
142+
1 - p-values for each feature.
131143
f_statitstic_ : ndarray
132144
F-statistics for each feature.
145+
146+
Returns
147+
-------
148+
importances_ : ndarray
149+
1 - p-values for each feature.
133150
"""
134151
f_statistic, p_values = f_classif(X, y)
135152
# Test the opposite hypothese to the anova
@@ -143,7 +160,9 @@ def importance(self, X, y):
143160
class UnivariateLinearRegressionTests(AdapterScikitLearn):
144161
"""
145162
Univariate linear regression F-test for regression tasks.
146-
This test is also known as Chow test :footcite:t:`chow1960tests`
163+
164+
This test is also known as the Chow test :footcite:t:`chow1960tests`.
165+
Uses scikit-learn's f_regression to compute F-statistics and p-values for each feature.
147166
148167
Parameters
149168
----------
@@ -155,9 +174,9 @@ class UnivariateLinearRegressionTests(AdapterScikitLearn):
155174
Attributes
156175
----------
157176
importances_ : ndarray
158-
P-values for each feature.
177+
1 - p-values for each feature.
159178
pvalues_ : ndarray
160-
P-values for each feature.
179+
1 - p-values for each feature.
161180
f_statitstic_ : ndarray
162181
F-statistics for each feature.
163182
@@ -175,7 +194,7 @@ def __init__(self, center=True, force_finite=True):
175194
@override
176195
def importance(self, X, y):
177196
"""
178-
Fit the univariate linear regression F-test model to the data.
197+
Compute univariate linear regression F-statistics and p-values for each feature.
179198
180199
Parameters
181200
----------
@@ -187,11 +206,16 @@ def importance(self, X, y):
187206
Sets
188207
----
189208
importances_ : ndarray
190-
P-values for each feature.
209+
1 - p-values for each feature.
191210
pvalues_ : ndarray
192-
P-values for each feature.
211+
1 - p-values for each feature.
193212
f_statitstic_ : ndarray
194213
F-statistics for each feature.
214+
215+
Returns
216+
-------
217+
importances_ : ndarray
218+
1 - p-values for each feature.
195219
"""
196220
f_statistic, p_values = f_regression(
197221
X, y, center=self.center, force_finite=self.force_finite
@@ -207,9 +231,9 @@ def importance(self, X, y):
207231
class MutualInformation(AdapterScikitLearn):
208232
"""
209233
Mutual information feature selection for regression or classification.
210-
This method was introduce by Shannon :footcite:t:`shannon1948mathematical`
211-
but for an introduction, you can look the section 2.4 of this book
212-
:footcite:t:`cover1999elements` .
234+
235+
This method was introduced by Shannon :footcite:t:`shannon1948mathematical`.
236+
For an introduction, see section 2.4 of :footcite:t:`cover1999elements`.
213237
214238
Parameters
215239
----------
@@ -256,7 +280,7 @@ def __init__(
256280
@override
257281
def importance(self, X, y):
258282
"""
259-
Fit the mutual information model to the data.
283+
Compute mutual information scores for each feature.
260284
261285
Parameters
262286
----------
@@ -271,6 +295,11 @@ def importance(self, X, y):
271295
Mutual information scores for each feature.
272296
pvalues_ : None
273297
P-values are not computed for mutual information.
298+
299+
Returns
300+
-------
301+
importances_ : ndarray
302+
Mutual information scores for each feature.
274303
"""
275304
if self.problem_type == "regression":
276305
mutual_information = mutual_info_regression(

0 commit comments

Comments
 (0)