Skip to content

Commit d217b95

Browse files
committed
docstrings added and fixed
1 parent babb541 commit d217b95

File tree

2 files changed

+138
-8
lines changed

2 files changed

+138
-8
lines changed

modAL/acquisition.py

+131-1
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,51 @@
1313

1414

1515
def PI(optimizer, X, tradeoff=0):
16+
"""
17+
Probability of improvement acquisition function for Bayesian optimization.
18+
19+
Parameters
20+
----------
21+
optimizer: modAL.models.BayesianEstimator object
22+
The BayesianEstimator object for which the utility is to be calculated.
23+
24+
X: numpy.ndarray of shape (n_samples, n_features)
25+
The samples for which the probability of improvement is to be calculated.
26+
27+
tradeoff: float
28+
Value controlling the tradeoff parameter.
29+
30+
Returns
31+
-------
32+
pi: numpy.ndarray of shape (n_samples, )
33+
Probability of improvement utility score.
34+
"""
1635
mean, std = optimizer.predict(X, return_std=True)
1736
std = std.reshape(-1, 1)
1837

1938
return ndtr((mean - optimizer.max_val - tradeoff)/std)
2039

2140

2241
def EI(optimizer, X, tradeoff=0):
42+
"""
43+
Expected improvement acquisition function for Bayesian optimization.
44+
45+
Parameters
46+
----------
47+
optimizer: modAL.models.BayesianEstimator object
48+
The BayesianEstimator object for which the utility is to be calculated.
49+
50+
X: numpy.ndarray of shape (n_samples, n_features)
51+
The samples for which the expected improvement is to be calculated.
52+
53+
tradeoff: float
54+
Value controlling the tradeoff parameter.
55+
56+
Returns
57+
-------
58+
ei: numpy.ndarray of shape (n_samples, )
59+
Expected improvement utility score.
60+
"""
2361
mean, std = optimizer.predict(X, return_std=True)
2462
std = std.reshape(-1, 1)
2563
z = (mean - optimizer.max_val - tradeoff)/std
@@ -29,7 +67,23 @@ def EI(optimizer, X, tradeoff=0):
2967

3068
def UCB(optimizer, X, beta=1):
3169
"""
32-
Ref: https://arxiv.org/abs/0912.3995
70+
Upper confidence bound acquisition function for Bayesian optimization.
71+
72+
Parameters
73+
----------
74+
optimizer: modAL.models.BayesianEstimator object
75+
The BayesianEstimator object for which the utility is to be calculated.
76+
77+
X: numpy.ndarray of shape (n_samples, n_features)
78+
The samples for which the upper confidence bound is to be calculated.
79+
80+
beta: float
81+
Value controlling the beta parameter.
82+
83+
Returns
84+
-------
85+
ucb: numpy.ndarray of shape (n_samples, )
86+
Upper confidence bound utility score.
3387
"""
3488
mean, std = optimizer.predict(X, return_std=True)
3589
std = std.reshape(-1, 1)
@@ -38,20 +92,96 @@ def UCB(optimizer, X, beta=1):
3892

3993

4094
def max_PI(optimizer, X, tradeoff=0, n_instances=1):
95+
"""
96+
Maximum PI query strategy. Selects the instance with highest probability of improvement.
97+
98+
Parameters
99+
----------
100+
optimizer: modAL.models.BayesianEstimator object
101+
The BayesianEstimator object for which the utility is to be calculated.
102+
103+
X: numpy.ndarray of shape (n_samples, n_features)
104+
The samples for which the probability of improvement is to be calculated.
105+
106+
tradeoff: float
107+
Value controlling the tradeoff parameter.
108+
109+
n_instances: int
110+
Number of samples to be queried.
111+
112+
Returns
113+
-------
114+
query_idx: numpy.ndarray of shape (n_instances, )
115+
The indices of the instances from X chosen to be labelled.
116+
117+
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
118+
The instances from X chosen to be labelled.
119+
"""
41120
pi = PI(optimizer, X, tradeoff=tradeoff)
42121
query_idx = multi_argmax(pi, n_instances=n_instances)
43122

44123
return query_idx, X[query_idx]
45124

46125

47126
def max_EI(optimizer, X, tradeoff=0, n_instances=1):
127+
"""
128+
Maximum EI query strategy. Selects the instance with highest expected improvement.
129+
130+
Parameters
131+
----------
132+
optimizer: modAL.models.BayesianEstimator object
133+
The BayesianEstimator object for which the utility is to be calculated.
134+
135+
X: numpy.ndarray of shape (n_samples, n_features)
136+
The samples for which the expected improvement is to be calculated.
137+
138+
tradeoff: float
139+
Value controlling the tradeoff parameter.
140+
141+
n_instances: int
142+
Number of samples to be queried.
143+
144+
Returns
145+
-------
146+
query_idx: numpy.ndarray of shape (n_instances, )
147+
The indices of the instances from X chosen to be labelled.
148+
149+
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
150+
The instances from X chosen to be labelled.
151+
"""
48152
ei = EI(optimizer, X, tradeoff=tradeoff)
49153
query_idx = multi_argmax(ei, n_instances=n_instances)
50154

51155
return query_idx, X[query_idx]
52156

53157

54158
def max_UCB(optimizer, X, beta=1, n_instances=1):
159+
"""
160+
Maximum UCB query strategy. Selects the instance with highest upper confidence
161+
bound.
162+
163+
Parameters
164+
----------
165+
optimizer: modAL.models.BayesianEstimator object
166+
The BayesianEstimator object for which the utility is to be calculated.
167+
168+
X: numpy.ndarray of shape (n_samples, n_features)
169+
The samples for which the probability of improvement is to be calculated.
170+
171+
beta: float
172+
Value controlling the beta parameter.
173+
174+
n_instances: int
175+
Number of samples to be queried.
176+
177+
Returns
178+
-------
179+
query_idx: numpy.ndarray of shape (n_instances, )
180+
The indices of the instances from X chosen to be labelled.
181+
182+
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
183+
The instances from X chosen to be labelled.
184+
"""
55185
ucb = UCB(optimizer, X, beta=beta)
56186
query_idx = multi_argmax(ucb, n_instances=n_instances)
57187

modAL/uncertainty.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,10 @@ def uncertainty_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwa
131131
Returns
132132
-------
133133
query_idx: numpy.ndarray of shape (n_instances, )
134-
The indices of the instances from X_pool chosen to be labelled.
134+
The indices of the instances from X chosen to be labelled.
135135
136-
X_pool[query_idx]: numpy.ndarray of shape (n_instances, n_features)
137-
The instances from X_pool chosen to be labelled.
136+
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
137+
The instances from X chosen to be labelled.
138138
"""
139139
uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs)
140140
query_idx = multi_argmax(uncertainty, n_instances=n_instances)
@@ -164,10 +164,10 @@ def margin_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwargs):
164164
Returns
165165
-------
166166
query_idx: numpy.ndarray of shape (n_instances, )
167-
The indices of the instances from X_pool chosen to be labelled.
167+
The indices of the instances from X chosen to be labelled.
168168
169169
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
170-
The instances from X_pool chosen to be labelled.
170+
The instances from X chosen to be labelled.
171171
"""
172172
margin = classifier_margin(classifier, X, **uncertainty_measure_kwargs)
173173
query_idx = multi_argmax(-margin, n_instances=n_instances)
@@ -197,10 +197,10 @@ def entropy_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwargs)
197197
Returns
198198
-------
199199
query_idx: numpy.ndarray of shape (n_instances, )
200-
The indices of the instances from X_pool chosen to be labelled.
200+
The indices of the instances from X chosen to be labelled.
201201
202202
X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
203-
The instances from X_pool chosen to be labelled.
203+
The instances from X chosen to be labelled.
204204
"""
205205
entropy = classifier_entropy(classifier, X, **uncertainty_measure_kwargs)
206206
query_idx = multi_argmax(entropy, n_instances=n_instances)

0 commit comments

Comments
 (0)