modAL.models.BayesianOptimizer now keeps track of the argmax and the max val of the known values of the function to be optimized

cosmic-cortex · cosmic-cortex · commit 5d94fe110e3c · 2018-04-24T11:23:30.000+02:00
diff --git a/modAL/acquisition.py b/modAL/acquisition.py
@@ -48,7 +48,7 @@ def optimizer_PI(optimizer, X, tradeoff=0):
     mean, std = optimizer.predict(X, return_std=True)
     std = std.reshape(-1, 1)
 
-    return PI(mean, std, optimizer.max_val, tradeoff)
+    return PI(mean, std, optimizer.y_max, tradeoff)
 
 
 def optimizer_EI(optimizer, X, tradeoff=0):
@@ -74,7 +74,7 @@ def optimizer_EI(optimizer, X, tradeoff=0):
     mean, std = optimizer.predict(X, return_std=True)
     std = std.reshape(-1, 1)
 
-    return EI(mean, std, optimizer.max_val, tradeoff)
+    return EI(mean, std, optimizer.y_max, tradeoff)
 
 
 def optimizer_UCB(optimizer, X, beta=1):
diff --git a/modAL/models.py b/modAL/models.py
@@ -402,6 +402,10 @@ class BayesianOptimizer(BaseLearner):
         If the model has been fitted already: numpy.ndarray containing the
         labels corresponding to _training_samples
 
+    X_max: None or numpy.ndarray of shape (n_samples, 3)
+
+    y_max: -np.inf or float
+
     Examples
     --------
     >>> import numpy as np
@@ -449,14 +453,19 @@ def __init__(self, *args, **kwargs):
         super(BayesianOptimizer, self).__init__(*args, **kwargs)
         # setting the maximum value
         if self.y_training is not None:
-            self.max_val = np.max(self.y_training)
+            max_idx = np.argmax(self.y_training)
+            self.X_max = self.X_training[max_idx]
+            self.y_max = self.y_training[max_idx]
         else:
-            self.max_val = -np.inf
+            self.X_max = None
+            self.y_max = -np.inf
 
-    def _set_max(self, y):
-        y_max = np.max(y)
-        if y_max > self.max_val:
-            self.max_val = y_max
+    def _set_max(self, X, y):
+        max_idx = np.argmax(y)
+        y_max = y[max_idx]
+        if y_max > self.y_max:
+            self.y_max = y_max
+            self.X_max = X[max_idx]
 
     def get_max(self):
         """
@@ -471,9 +480,8 @@ def get_max(self):
             The currently best value.
 
         """
-        max_idx = np.argmax(self.y_training)
 
-        return self.X_training[max_idx], self.y_training[max_idx]
+        return self.X_max, self.y_max
 
     def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs):
         """
@@ -504,10 +512,10 @@ def teach(self, X, y, bootstrap=False, only_new=False, **fit_kwargs):
         self._add_training_data(X, y)
         if not only_new:
             self._fit_to_known(bootstrap=bootstrap, **fit_kwargs)
-            self._set_max(y)
         else:
             self._fit_on_new(X, y, bootstrap=bootstrap, **fit_kwargs)
-            self._set_max(y)
+
+        self._set_max(X, y)
 
 
 class BaseCommittee(ABC, BaseEstimator):
diff --git a/tests/core_tests.py b/tests/core_tests.py
@@ -150,7 +150,7 @@ def test_optimizer_PI(self):
             )
             
             optimizer = modAL.models.BayesianOptimizer(estimator=mock_estimator)
-            optimizer._set_max([max_val])
+            optimizer._set_max([0], [max_val])
 
             np.testing.assert_almost_equal(
                 ndtr((mean - max_val - tradeoff)/std),
@@ -169,10 +169,10 @@ def test_optimizer_EI(self):
             )
 
             optimizer = modAL.models.BayesianOptimizer(estimator=mock_estimator)
-            optimizer._set_max([max_val])
+            optimizer._set_max([0], [max_val])
 
-            true_EI = (mean - optimizer.max_val - tradeoff) * ndtr((mean - optimizer.max_val - tradeoff)/std)\
-                      + std * norm.pdf((mean - optimizer.max_val - tradeoff)/std)
+            true_EI = (mean - optimizer.y_max - tradeoff) * ndtr((mean - optimizer.y_max - tradeoff) / std) \
+                      + std * norm.pdf((mean - optimizer.y_max - tradeoff) / std)
 
             np.testing.assert_almost_equal(
                 true_EI,
@@ -211,7 +211,7 @@ def test_selection(self):
                 )
 
                 optimizer = modAL.models.BayesianOptimizer(estimator=mock_estimator)
-                optimizer._set_max([max_val])
+                optimizer._set_max([0], [max_val])
 
                 modAL.acquisition.max_PI(optimizer, X, tradeoff=np.random.rand(), n_instances=n_instances)
                 modAL.acquisition.max_EI(optimizer, X, tradeoff=np.random.rand(), n_instances=n_instances)
@@ -532,7 +532,7 @@ def test_set_max(self):
         # case 1: the estimator is not fitted yet
         regressor = mock.MockEstimator()
         learner = modAL.models.BayesianOptimizer(estimator=regressor)
-        self.assertEqual(-np.inf, learner.max_val)
+        self.assertEqual(-np.inf, learner.y_max)
 
         # case 2: the estimator is fitted already
         for n_samples in range(1, 100):
@@ -545,17 +545,20 @@ def test_set_max(self):
                 estimator=regressor,
                 X_training=X, y_training=y
             )
-            np.testing.assert_almost_equal(max_val, learner.max_val)
+            np.testing.assert_almost_equal(max_val, learner.y_max)
 
     def test_set_new_max(self):
         for n_reps in range(100):
             # case 1: the learner is not fitted yet
             for n_samples in range(1, 10):
+                X = np.random.rand(n_samples, 3)
                 y = np.random.rand(n_samples)
+                max_idx = np.argmax(y)
                 regressor = mock.MockEstimator()
                 learner = modAL.models.BayesianOptimizer(estimator=regressor)
-                learner._set_max(y)
-                self.assertEqual(learner.max_val, np.max(y))
+                learner._set_max(X, y)
+                np.testing.assert_equal(learner.X_max, X[max_idx])
+                np.testing.assert_equal(learner.y_max, y[max_idx])
 
             # case 2: new value is not a maximum
             for n_samples in range(1, 10):
@@ -568,10 +571,13 @@ def test_set_new_max(self):
                     X_training=X, y_training=y
                 )
 
+                X_new = np.random.rand()
                 y_new = y - np.random.rand()
-                old_max = learner.max_val
-                learner._set_max(y_new)
-                np.testing.assert_almost_equal(old_max, learner.max_val)
+                X_old_max = learner.X_max
+                y_old_max = learner.y_max
+                learner._set_max(X_new, y_new)
+                np.testing.assert_equal(X_old_max, learner.X_max)
+                np.testing.assert_equal(y_old_max, learner.y_max)
 
             # case 3: new value is a maximum
             for n_samples in range(1, 10):
@@ -584,9 +590,12 @@ def test_set_new_max(self):
                     X_training=X, y_training=y
                 )
 
+                X_new = np.random.rand(n_samples, 2)
                 y_new = y + np.random.rand()
-                learner._set_max(y_new)
-                np.testing.assert_almost_equal(np.max(y_new), learner.max_val)
+                max_idx = np.argmax(y_new)
+                learner._set_max(X_new, y_new)
+                np.testing.assert_equal(X_new[max_idx], learner.X_max)
+                np.testing.assert_equal(y_new[max_idx], learner.y_max)
 
     def test_get_max(self):
         for n_samples in range(1, 100):