|
| 1 | +""" |
| 2 | +Class to create/store database of existing evaluations, and routines to select |
| 3 | +existing evaluations to build an initial linear model |
| 4 | +""" |
| 5 | +import logging |
| 6 | +import numpy as np |
| 7 | + |
| 8 | +from .util import apply_scaling, dykstra |
| 9 | +from .trust_region import ctrsbox_geometry, trsbox_geometry |
| 10 | + |
| 11 | +__all__ = ['EvaluationDatabase'] |
| 12 | + |
| 13 | +module_logger = logging.getLogger(__name__) |
| 14 | + |
| 15 | + |
| 16 | +# Class to store set of evaluations (x, rx) |
| 17 | +class EvaluationDatabase(object): |
| 18 | + def __init__(self, eval_list=None, starting_eval=None): |
| 19 | + # eval_list is a list of tuples (x, rx) |
| 20 | + self._evals = [] |
| 21 | + if eval_list is not None: |
| 22 | + for e in eval_list: |
| 23 | + self._evals.append(e) |
| 24 | + |
| 25 | + # Which evaluation index should be the starting point of the optimization? |
| 26 | + self.starting_eval = None |
| 27 | + if starting_eval is not None and 0 <= starting_eval <= len(self._evals): |
| 28 | + self.starting_eval = starting_eval |
| 29 | + |
| 30 | + def __len__(self): |
| 31 | + return len(self._evals) |
| 32 | + |
| 33 | + def append(self, x, rx, make_starting_eval=False): |
| 34 | + self._evals.append((x, rx)) |
| 35 | + if make_starting_eval: |
| 36 | + self.starting_eval = len(self) - 1 |
| 37 | + |
| 38 | + def set_starting_eval(self, index): |
| 39 | + if 0 <= index < len(self): |
| 40 | + self.starting_eval = index |
| 41 | + else: |
| 42 | + raise IndexError("Invalid index %g given current set of %g evaluations" % (index, len(self))) |
| 43 | + |
| 44 | + def get_starting_eval_idx(self): |
| 45 | + if len(self) == 0: |
| 46 | + raise RuntimeError("No evaluations available, no suitable starting evaluation ") |
| 47 | + elif self.starting_eval is None: |
| 48 | + module_logger.warning("Starting evaluation index not set, using most recently appended evaluation") |
| 49 | + self.starting_eval = len(self) - 1 |
| 50 | + |
| 51 | + return self.starting_eval |
| 52 | + |
| 53 | + def get_eval(self, index): |
| 54 | + # Return (x, rx) for given index |
| 55 | + if 0 <= index < len(self): |
| 56 | + return self._evals[index][0], self._evals[index][1] |
| 57 | + else: |
| 58 | + raise IndexError("Invalid index %g given current set of %g evaluations" % (index, len(self))) |
| 59 | + |
| 60 | + def get_x(self, index): |
| 61 | + return self.get_eval(index)[0] |
| 62 | + |
| 63 | + def get_rx(self, index): |
| 64 | + return self.get_eval(index)[1] |
| 65 | + |
| 66 | + def apply_scaling(self, scaling_changes): |
| 67 | + # Adjust all input x values based on scaling |
| 68 | + if scaling_changes is not None: |
| 69 | + for i in range(len(self)): |
| 70 | + x, rx = self._evals[i] |
| 71 | + self._evals[i] = (apply_scaling(x, scaling_changes), rx) |
| 72 | + return |
| 73 | + |
| 74 | + def select_starting_evals(self, delta, xl=None, xu=None, projections=[], tol=1e-8, |
| 75 | + dykstra_max_iters=100, dykstra_tol=1e-10): |
| 76 | + # Given a database 'evals' with prescribed starting index, and initial trust-region radius delta > 0 |
| 77 | + # determine a subset of the database to use |
| 78 | + |
| 79 | + # The bounds xl <= x <= xu and projection list are used to determine where to evaluate any new points |
| 80 | + # (ensuring they are feasible) |
| 81 | + |
| 82 | + if delta <= 0.0: |
| 83 | + raise RuntimeError("delta must be strictly positive") |
| 84 | + if len(self) == 0: |
| 85 | + raise RuntimeError("Need at least one evaluation to select starting evaluations") |
| 86 | + |
| 87 | + base_idx = self.get_starting_eval_idx() |
| 88 | + xbase = self.get_x(self.get_starting_eval_idx()) |
| 89 | + n = len(xbase) |
| 90 | + module_logger.debug("Selecting starting evaluations from existing database") |
| 91 | + module_logger.debug("Have %g evaluations to choose from" % len(self)) |
| 92 | + module_logger.debug("Using base index %g" % base_idx) |
| 93 | + |
| 94 | + # For linear interpolation, we will use the matrix |
| 95 | + # M = [[1, 0], [0, L]] where L has rows (xi-xbase)/delta |
| 96 | + # So, just build a large matrix Lfull with everything |
| 97 | + n_perturbations = len(self) - 1 |
| 98 | + Lfull = np.zeros((n_perturbations, n)) |
| 99 | + row_idx = 0 |
| 100 | + for i in range(n_perturbations + 1): |
| 101 | + if i == base_idx: |
| 102 | + continue |
| 103 | + Lfull[row_idx, :] = (self.get_x(i) - xbase) / delta # Lfull[i,:] = (xi-xbase) / delta |
| 104 | + row_idx += 1 |
| 105 | + |
| 106 | + xdist = np.linalg.norm(Lfull, axis=1) # xdist[i] = ||Lfull[i,:]|| = ||xi-xbase|| / delta |
| 107 | + # module_logger.debug("xdist =", xdist) |
| 108 | + |
| 109 | + # We ideally want xdist ~ 1, so reweight these distances based on that (large xdist_reweighted --> xdist ~ 1 --> good) |
| 110 | + xdist_reweighted = 1.0 / np.maximum(xdist, 1.0 / xdist) |
| 111 | + # module_logger.debug("xdist_reweighted =", xdist_reweighted) |
| 112 | + |
| 113 | + if n_perturbations == 0: |
| 114 | + module_logger.debug("Only one evaluation available, just selecting that") |
| 115 | + return base_idx, [], delta * np.eye(n) |
| 116 | + |
| 117 | + # Now, find as many good perturbations as we can |
| 118 | + # Good = not too far from xbase (relative to delta) and sufficiently linearly independent |
| 119 | + # from other selected perturbations (i.e. Lfull[perturbation_idx,:] well-conditioned |
| 120 | + # and len(perturbation_idx) <= n |
| 121 | + perturbation_idx = [] # what point indices to use as perturbations |
| 122 | + |
| 123 | + for iter in range(min(n_perturbations, n)): |
| 124 | + # Add one more good perturbation, if available |
| 125 | + # Note: can only add at most the number of available perturbations, or n perturbations, whichever is smaller |
| 126 | + if iter == 0: |
| 127 | + # First perturbation: every direction is equally good, so pick the point closest to the |
| 128 | + # trust-region boundary |
| 129 | + idx = int(np.argmax(xdist_reweighted)) |
| 130 | + module_logger.debug("Adding index %g with ||xi-xbase|| / delta = %g" % (idx if idx < base_idx else idx+1, xdist[idx])) |
| 131 | + perturbation_idx.append(idx) |
| 132 | + else: |
| 133 | + Q, R = np.linalg.qr(Lfull[perturbation_idx, :].T, mode='reduced') |
| 134 | + # module_logger.debug("Current perturbation_idx =", perturbation_idx) |
| 135 | + L_rem = Lfull @ (np.eye(n) - Q @ Q.T) # part of (xi-xbase)/delta orthogonal to current perturbations |
| 136 | + # rem_size = fraction of original length ||xi-xbase||/delta that is orthogonal to current perturbations |
| 137 | + # all entries are in [0,1], and is zero for already selected perturbations |
| 138 | + rem_size = np.linalg.norm(L_rem, axis=1) / xdist |
| 139 | + rem_size[perturbation_idx] = 0 # ensure this holds exactly |
| 140 | + # module_logger.debug("rem_size =", rem_size) |
| 141 | + # module_logger.debug("rem_size * xdist_reweighted =", rem_size * xdist_reweighted) |
| 142 | + |
| 143 | + # We want a point with large rem_size and xdist ~ 1 (i.e. xdist_reweighted large) |
| 144 | + idx = int(np.argmax(rem_size * xdist_reweighted)) |
| 145 | + if rem_size[idx] * xdist_reweighted[idx] > tol: |
| 146 | + # This ensures new perturbation is sufficiently linearly independent of existing perturbations |
| 147 | + # (and also ensures idx hasn't already been chosen) |
| 148 | + module_logger.debug("Adding index %g" % (idx if idx < base_idx else idx+1)) |
| 149 | + perturbation_idx.append(idx) |
| 150 | + else: |
| 151 | + module_logger.debug("No more linearly independent directions, quitting") |
| 152 | + break |
| 153 | + |
| 154 | + # Find new linearly independent directions |
| 155 | + if len(perturbation_idx) < n: |
| 156 | + module_logger.debug("Selecting %g new linearly independent directions" % (n - len(perturbation_idx))) |
| 157 | + Q, _ = np.linalg.qr(Lfull[perturbation_idx, :].T, mode='complete') |
| 158 | + new_perturbations = delta * Q[:, len(perturbation_idx):].T |
| 159 | + |
| 160 | + # Make perturbations feasible w.r.t. xl <= x <= xu and projections |
| 161 | + # Note: if len(projections) > 0, then the projection list *already* includes bounds |
| 162 | + # Don't need to make pre-existing evaluations feasible, since we already have r(x) for these |
| 163 | + |
| 164 | + # Start construction of interpolation matrix for later |
| 165 | + L = np.zeros((n, n), dtype=float) |
| 166 | + L[:len(perturbation_idx), :] = Lfull[perturbation_idx, :] |
| 167 | + L[len(perturbation_idx):, :] = new_perturbations / delta |
| 168 | + |
| 169 | + # Since we already have a full set of linearly independent directions, |
| 170 | + # we do this by moving each infeasible perturbation to a geometry-improving location |
| 171 | + for i in range(new_perturbations.shape[0]): |
| 172 | + xnew = xbase + new_perturbations[i, :] |
| 173 | + # Check feasibility |
| 174 | + if len(projections) == 0: |
| 175 | + # Bounds only |
| 176 | + feasible = np.all(xnew >= xl) and np.all(xnew <= xu) |
| 177 | + else: |
| 178 | + # Projections |
| 179 | + xnew_C = dykstra(projections, xnew, max_iter=dykstra_max_iters, tol=dykstra_tol) |
| 180 | + feasible = np.linalg.norm(xnew - xnew_C) < dykstra_tol |
| 181 | + |
| 182 | + if feasible: |
| 183 | + # Skip feasible points, nothing to do |
| 184 | + continue |
| 185 | + |
| 186 | + # If infeasible, build Lagrange polynomial and move to geometry-improving location in B(xbase,delta) |
| 187 | + # which will automatically be feasible |
| 188 | + module_logger.debug("Moving default %g-th new perturbation to ensure feasibility" % i) |
| 189 | + c = 0.0 # Lagrange polynomial centered at xbase |
| 190 | + ei = np.zeros((n,), dtype=float) |
| 191 | + ei[len(perturbation_idx) + i] = 1.0 |
| 192 | + g = np.linalg.solve(L, ei) / delta # divide by delta because L is scaled by 1/delta |
| 193 | + if len(projections) == 0: |
| 194 | + new_perturbations[i, :] = trsbox_geometry(xbase, c, g, xl, xu, delta) |
| 195 | + else: |
| 196 | + new_perturbations[i, :] = ctrsbox_geometry(xbase, c, g, projections, delta) |
| 197 | + |
| 198 | + # Update L after replacement |
| 199 | + L[len(perturbation_idx) + i, :] = new_perturbations[i,:] / delta |
| 200 | + else: |
| 201 | + module_logger.debug("Full set of directions found, no need for new evaluations") |
| 202 | + new_perturbations = None |
| 203 | + |
| 204 | + # perturbation_idx in [0, ..., n_perturbations-1], reset to be actual indices |
| 205 | + for i in range(len(perturbation_idx)): |
| 206 | + if perturbation_idx[i] >= base_idx: |
| 207 | + perturbation_idx[i] += 1 |
| 208 | + return base_idx, perturbation_idx, new_perturbations |
0 commit comments