docstrings added and fixed

cosmic-cortex · cosmic-cortex · commit d217b95d5774 · 2018-04-23T23:13:31.000+02:00
diff --git a/modAL/acquisition.py b/modAL/acquisition.py
@@ -13,13 +13,51 @@
 
 
 def PI(optimizer, X, tradeoff=0):
+    """
+    Probability of improvement acquisition function for Bayesian optimization.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the probability of improvement is to be calculated.
+
+    tradeoff: float
+        Value controlling the tradeoff parameter.
+
+    Returns
+    -------
+    pi: numpy.ndarray of shape (n_samples, )
+        Probability of improvement utility score.
+    """
     mean, std = optimizer.predict(X, return_std=True)
     std = std.reshape(-1, 1)
 
     return ndtr((mean - optimizer.max_val - tradeoff)/std)
 
 
 def EI(optimizer, X, tradeoff=0):
+    """
+    Expected improvement acquisition function for Bayesian optimization.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the expected improvement is to be calculated.
+
+    tradeoff: float
+        Value controlling the tradeoff parameter.
+
+    Returns
+    -------
+    ei: numpy.ndarray of shape (n_samples, )
+        Expected improvement utility score.
+    """
     mean, std = optimizer.predict(X, return_std=True)
     std = std.reshape(-1, 1)
     z = (mean - optimizer.max_val - tradeoff)/std
@@ -29,7 +67,23 @@ def EI(optimizer, X, tradeoff=0):
 
 def UCB(optimizer, X, beta=1):
     """
-    Ref: https://arxiv.org/abs/0912.3995
+    Upper confidence bound acquisition function for Bayesian optimization.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the upper confidence bound is to be calculated.
+
+    beta: float
+        Value controlling the beta parameter.
+
+    Returns
+    -------
+    ucb: numpy.ndarray of shape (n_samples, )
+        Upper confidence bound utility score.
     """
     mean, std = optimizer.predict(X, return_std=True)
     std = std.reshape(-1, 1)
@@ -38,20 +92,96 @@ def UCB(optimizer, X, beta=1):
 
 
 def max_PI(optimizer, X, tradeoff=0, n_instances=1):
+    """
+    Maximum PI query strategy. Selects the instance with highest probability of improvement.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the probability of improvement is to be calculated.
+
+    tradeoff: float
+        Value controlling the tradeoff parameter.
+
+    n_instances: int
+        Number of samples to be queried.
+
+    Returns
+    -------
+    query_idx: numpy.ndarray of shape (n_instances, )
+        The indices of the instances from X chosen to be labelled.
+
+    X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
+        The instances from X chosen to be labelled.
+    """
     pi = PI(optimizer, X, tradeoff=tradeoff)
     query_idx = multi_argmax(pi, n_instances=n_instances)
 
     return query_idx, X[query_idx]
 
 
 def max_EI(optimizer, X, tradeoff=0, n_instances=1):
+    """
+    Maximum EI query strategy. Selects the instance with highest expected improvement.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the expected improvement is to be calculated.
+
+    tradeoff: float
+        Value controlling the tradeoff parameter.
+
+    n_instances: int
+        Number of samples to be queried.
+
+    Returns
+    -------
+    query_idx: numpy.ndarray of shape (n_instances, )
+        The indices of the instances from X chosen to be labelled.
+
+    X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
+        The instances from X chosen to be labelled.
+    """
     ei = EI(optimizer, X, tradeoff=tradeoff)
     query_idx = multi_argmax(ei, n_instances=n_instances)
 
     return query_idx, X[query_idx]
 
 
 def max_UCB(optimizer, X, beta=1, n_instances=1):
+    """
+    Maximum UCB query strategy. Selects the instance with highest upper confidence
+    bound.
+
+    Parameters
+    ----------
+    optimizer: modAL.models.BayesianEstimator object
+        The BayesianEstimator object for which the utility is to be calculated.
+
+    X: numpy.ndarray of shape (n_samples, n_features)
+        The samples for which the probability of improvement is to be calculated.
+
+    beta: float
+        Value controlling the beta parameter.
+
+    n_instances: int
+        Number of samples to be queried.
+
+    Returns
+    -------
+    query_idx: numpy.ndarray of shape (n_instances, )
+        The indices of the instances from X chosen to be labelled.
+
+    X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
+        The instances from X chosen to be labelled.
+    """
     ucb = UCB(optimizer, X, beta=beta)
     query_idx = multi_argmax(ucb, n_instances=n_instances)
 
diff --git a/modAL/uncertainty.py b/modAL/uncertainty.py
@@ -131,10 +131,10 @@ def uncertainty_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwa
     Returns
     -------
     query_idx: numpy.ndarray of shape (n_instances, )
-        The indices of the instances from X_pool chosen to be labelled.
+        The indices of the instances from X chosen to be labelled.
 
-    X_pool[query_idx]: numpy.ndarray of shape (n_instances, n_features)
-        The instances from X_pool chosen to be labelled.
+    X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
+        The instances from X chosen to be labelled.
     """
     uncertainty = classifier_uncertainty(classifier, X, **uncertainty_measure_kwargs)
     query_idx = multi_argmax(uncertainty, n_instances=n_instances)
@@ -164,10 +164,10 @@ def margin_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwargs):
     Returns
     -------
     query_idx: numpy.ndarray of shape (n_instances, )
-        The indices of the instances from X_pool chosen to be labelled.
+        The indices of the instances from X chosen to be labelled.
 
     X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
-        The instances from X_pool chosen to be labelled.
+        The instances from X chosen to be labelled.
     """
     margin = classifier_margin(classifier, X, **uncertainty_measure_kwargs)
     query_idx = multi_argmax(-margin, n_instances=n_instances)
@@ -197,10 +197,10 @@ def entropy_sampling(classifier, X, n_instances=1, **uncertainty_measure_kwargs)
     Returns
     -------
     query_idx: numpy.ndarray of shape (n_instances, )
-        The indices of the instances from X_pool chosen to be labelled.
+        The indices of the instances from X chosen to be labelled.
 
     X[query_idx]: numpy.ndarray of shape (n_instances, n_features)
-        The instances from X_pool chosen to be labelled.
+        The instances from X chosen to be labelled.
     """
     entropy = classifier_entropy(classifier, X, **uncertainty_measure_kwargs)
     query_idx = multi_argmax(entropy, n_instances=n_instances)