diff --git a/BUILD.bazel b/BUILD.bazel index 80a2538a..51361082 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -2,6 +2,8 @@ load("@rules_swiftnav//cc:defs.bzl", "UNIT", "swift_cc_test", "swift_cc_test_lib load("//:copts.bzl", "COPTS") load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile_commands") +subpackages(include = ["examples/**"]) + refresh_compile_commands( name = "gen_compile_commands", visibility = ["//visibility:public"], @@ -21,6 +23,7 @@ filegroup( cc_library( name = "albatross", hdrs = [ + "include/albatross/CGGP", "include/albatross/Common", "include/albatross/Core", "include/albatross/CovarianceFunctions", @@ -99,6 +102,7 @@ cc_library( "include/albatross/src/details/traits.hpp", "include/albatross/src/details/typecast.hpp", "include/albatross/src/details/unused.hpp", + "include/albatross/src/eigen/partial_cholesky.hpp", "include/albatross/src/eigen/serializable_ldlt.hpp", "include/albatross/src/eigen/serializable_spqr.hpp", "include/albatross/src/evaluation/cross_validation.hpp", @@ -125,6 +129,7 @@ cc_library( "include/albatross/src/linalg/qr_utils.hpp", "include/albatross/src/linalg/spqr_utils.hpp", "include/albatross/src/models/conditional_gaussian.hpp", + "include/albatross/src/models/conjugate_gradient_gp.hpp", "include/albatross/src/models/gp.hpp", "include/albatross/src/models/least_squares.hpp", "include/albatross/src/models/null_model.hpp", @@ -200,6 +205,7 @@ swift_cc_test( "tests/test_block_utils.cc", "tests/test_call_trace.cc", "tests/test_callers.cc", + "tests/test_cg_gp.cc", "tests/test_chi_squared_versus_gsl.cc", "tests/test_compression.cc", "tests/test_concatenate.cc", @@ -229,6 +235,7 @@ swift_cc_test( "tests/test_models.cc", "tests/test_models.h", "tests/test_parameter_handling_mixin.cc", + "tests/test_partial_cholesky.cc", "tests/test_prediction.cc", "tests/test_radial.cc", "tests/test_random_utils.cc", diff --git a/include/albatross/CGGP b/include/albatross/CGGP new file mode 100644 index 00000000..f84f897a --- /dev/null +++ b/include/albatross/CGGP @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2025 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CG_GP_H +#define ALBATROSS_CG_GP_H + +#include "GP" +#include +#include + +#endif // ALBATROSS_CG_GP_H diff --git a/include/albatross/src/eigen/partial_cholesky.hpp b/include/albatross/src/eigen/partial_cholesky.hpp new file mode 100644 index 00000000..8b2750ab --- /dev/null +++ b/include/albatross/src/eigen/partial_cholesky.hpp @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2025 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +namespace Eigen { + +// A pivoted, partial Cholesky decomposition. +// +// This is meant for use as a preconditioner for symmetric, +// positive-definite problems. The algorithm is described in: +// +// On the Low-rank Approximation by the Pivoted Cholesky Decomposition +// H. Harbrecht, M. Peters and R. Schneider +// http://www.dfg-spp1324.de/download/preprints/preprint076.pdf +// +// In this implementation, we make the natural choice to avoid storing +// a copy of the input matrix; we only copy its diagonal for use +// during the decomposition. The L matrix already stores the sequence +// of updates to each relevant column of A, so these updates are +// applied on demand as each pivot column is selected. +template class PartialCholesky { +public: + using MatrixType = Matrix; + using StorageIndex = typename MatrixType::StorageIndex; + using RealScalar = typename NumTraits::Real; + + using PermutationType = PermutationMatrix; + + static constexpr const Index cDefaultOrder = 22; + + static constexpr const double cDefaultNugget = 1.e-2; + + PartialCholesky() {} + + explicit PartialCholesky(const MatrixType &A) + : m_rows{A.rows()}, m_cols{A.cols()} { + compute(A); + } + + explicit PartialCholesky(Index order) : m_order{order} {} + + PartialCholesky(Index order, RealScalar nugget) + : m_order{order}, m_nugget{nugget} {} + + PartialCholesky(const MatrixType &A, Index order, RealScalar nugget) + : m_rows{A.rows()}, m_cols{A.cols()}, m_order{order}, m_nugget{nugget} { + compute(A); + } + + inline Index order() const { return m_order; } + + PartialCholesky &setOrder(Index order) { + m_order = order; + return *this; + } + + inline double nugget() const { return m_nugget; } + + PartialCholesky &setNugget(double nugget) { + m_nugget = nugget; + return *this; + } + + // Unlike the normal dense LDLT and friends, we do not want to + // copy the potentially quite large A. Unlike the CG solver, we + // do not want to retain any reference to A's data once we have + // finished this function. + PartialCholesky &compute(const MatrixType &A) { + m_rows = A.rows(); + m_cols = A.cols(); + PermutationType transpositions(rows()); + transpositions.setIdentity(); + const Index max_rank = std::min(order(), rows()); + MatrixType L{MatrixType::Zero(rows(), max_rank)}; + + // A's diagonal; needs to keep getting pivoted, shifted and + // searched at each step. We track this separately from the rest + // of A because we have to search each time, and for the relevant + // off-diagonal columns of A, we only apply the relevant updates + // we strictly need. + VectorXd diag{A.diagonal()}; + + const auto calc_error = [&diag](Index k) { + return diag.tail(diag.size() - k).array().sum(); + }; + + RealScalar error = calc_error(0); + + for (Index k = 0; k < max_rank; ++k) { + Index max_index; + diag.tail(diag.size() - k).maxCoeff(&max_index); + max_index += k; + // std::cout << "step " << k << ": found max element " << diag(max_index) + // << " at position " << max_index - k << " in " + // << diag.tail(diag.size() - k).transpose() << std::endl; + + if (max_index != k) { + transpositions.applyTranspositionOnTheRight(k, max_index); + std::swap(diag(k), diag(max_index)); + L.row(k).swap(L.row(max_index)); + // std::cout << max_index << " <-> " << k << std::endl; + } + + if (diag(k) <= 0.) { + m_info = InvalidInput; + return *this; + } + + const RealScalar alpha = std::sqrt(diag(k)); + + L(k, k) = alpha; + + const Index tail_size = rows() - k - 1; + if (tail_size < 1) { + break; + } + + // Everything below here should be ordered appropriately -- we + // pivot `diag` and the rows of `L` every time we do an update + VectorXd b = + ((transpositions.transpose() * A.col(transpositions.indices()(k)))) + .tail(tail_size); + + // I couldn't find this derived anywhere -- basically what + // happens here is that for the lower-right submatrix below + // diagonal element k of the pivoted input matrix, we have to + // update it with b_i b_i^t / a_i = l_i l_i^t where a_i = + // alpha_i^2 = diagonal element k, _for each preceding pivot + // column i_ below the current one. Of course we are modifying + // the whole input matrix in place, and we only care about the + // relevant column below the pivot we just chose. The + // successive updates to this column A_k,k+1:n are l_i * l_i[k] + // -- the columns of the bottom-left submatrix of L below k, + // each column scaled by the element just above it (in row k) + // respectively. + for (Index i = 0; i < k; ++i) { + b -= L.col(i).tail(tail_size) * L(k, i); + } + + L.col(k).tail(tail_size) = b / alpha; + diag.tail(tail_size) -= + L.col(k).tail(tail_size).array().square().matrix(); + + for (Index i = 0; i < k; ++i) { + assert(L(i, i) >= L(i + 1, i + 1) && "failure in ordering invariant!"); + } + + assert(calc_error(k + 1) < error && "failure in convergence criterion!"); + error = calc_error(k + 1); + + // std::cout << "step " << k + // << ": error = " << diag.tail(diag.size() - k).array().sum() + // << std::endl; + } + + m_error = diag.tail(diag.size() - max_rank).array().sum(); + + // m_nugget = std::sqrt(A.diagonal().minCoeff()); + + m_transpositions = transpositions; + + m_L = L; + + // We decompose before returning to save time on repeated solves. + // + // Arguably we could pre-apply the outer terms of Woodbury, but + // computing that full matrix would significantly increase our + // storage requirements in the typical case where k << n. + m_decomp = LDLT(MatrixXd::Identity(L.cols(), L.cols()) + + 1 / (m_nugget * m_nugget) * L.transpose() * L); + + MatrixXd Ltall(L.rows() + max_rank, L.cols()); + Ltall.topRows(L.rows()) = transpositions * L; + Ltall.bottomRows(max_rank) = + MatrixXd::Identity(max_rank, max_rank) * m_nugget; + + // std::cout << "Ltall (" << Ltall.rows() << "x" << Ltall.cols() << "):\n" + // << Ltall << std::endl; + m_qr = HouseholderQR(Ltall); + MatrixXd thin_Q = m_qr.householderQ() * MatrixXd::Identity(m_qr.rows(), m_L.cols()); + // std::cout << "thin_Q (" << thin_Q.rows() << "x" << thin_Q.cols() << "):\n" + // << thin_Q << std::endl; + m_Q = thin_Q.topRows(rows()); + // std::cout << "m_Q (" << m_Q.rows() << "x" << m_Q.cols() << "):\n" + // << m_Q << std::endl; + + m_info = Success; + return *this; + } + + template + Matrix solve(const Rhs &b) const { + assert(finished() && + "Please don't call 'solve()' on an unintialised decomposition!"); + const double n2 = m_nugget * m_nugget; + + // std::cout << "Q^T b:\n" << MatrixXd(m_Q.transpose() * b) << std::endl; + // std::cout << "Q Q^T b:\n" << MatrixXd(m_Q * (m_Q.transpose() * b)) << std::endl; + // std::cout << "b - Q Q^T b:\n" << MatrixXd(b - m_Q * (m_Q.transpose() * b)) << std::endl; + + Matrix ret = + 1 / n2 * (b - m_Q * (m_Q.transpose() * b)); + // Matrix ret = + // 1 / n2 * + // (b - (m_transpositions * m_L * + // m_decomp.solve(m_L.transpose() * m_transpositions.transpose() * + // b / n2))); + return ret; + } + + LDLT direct_solve() const { + assert(finished() && "Please don't call 'direct_solve()' on an " + "uninitialised decomposition!"); + return LDLT(permutationsP() * matrixL() * matrixL().transpose() * + permutationsP().transpose() + + MatrixXd::Identity(rows(), cols()) * nugget() * + nugget()); + } + + MatrixXd matrixL() const { + assert(finished() && + "Please don't call 'matrixL()' on an uninitialised decomposition!"); + + return m_L; + } + + PermutationType permutationsP() const { + assert(finished() && "Please don't call 'permutationsP()' on an " + "uninitialised decomposition!"); + + return m_transpositions; + } + + inline bool finished() const { + return rows() > 0 && cols() > 0 & info() == Success; + } + + inline MatrixType reconstructedMatrix() const { + assert(finished() && "Please don't call 'reconstructedMatrix()' on an " + "unintialised decomposition!"); + return MatrixType(permutationsP() * matrixL() * matrixL().transpose() * + permutationsP().transpose()); + } + + inline Index rows() const { return m_rows; } + inline Index cols() const { return m_cols; } + + ComputationInfo info() const { return m_info; } + + RealScalar error() const { + assert(finished() && + "Please don't call 'error()' on an unintialised decomposition!"); + return m_error; + } + +private: + StorageIndex m_rows{0}; + StorageIndex m_cols{0}; + Index m_order{cDefaultOrder}; + RealScalar m_nugget{cDefaultNugget}; + RealScalar m_error{0}; + ComputationInfo m_info{Success}; + MatrixType m_L{}; + LDLT m_decomp{}; + MatrixXd m_Q{}; + HouseholderQR m_qr{}; + PermutationType m_transpositions{}; +}; + +} // namespace Eigen \ No newline at end of file diff --git a/include/albatross/src/models/conjugate_gradient_gp.hpp b/include/albatross/src/models/conjugate_gradient_gp.hpp new file mode 100644 index 00000000..afc7ee76 --- /dev/null +++ b/include/albatross/src/models/conjugate_gradient_gp.hpp @@ -0,0 +1,340 @@ +/* + * Copyright (C) 2025 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +namespace albatross { + +template +struct ConjugateGradientGPFit {}; + +struct IterativeSolverOptions { + // Negative values for these will be ignored. + static constexpr const Eigen::Index cDefaultMaxIterations{-1}; + Eigen::Index max_iterations{cDefaultMaxIterations}; + static constexpr const double cDefaultTolerance{-1}; + double tolerance{cDefaultTolerance}; + + IterativeSolverOptions() {} + + IterativeSolverOptions(Eigen::Index iterations, double tol) + : max_iterations{iterations}, tolerance{tol} {} + + template + explicit IterativeSolverOptions(SolverType &&solver) + : max_iterations{solver.maxIterations()}, tolerance{solver.tolerance()} {} + + template + void configure_solver(SolverType &solver) const { + if (tolerance > 0) { + solver.setTolerance(tolerance); + } + + if (max_iterations > 0) { + solver.setMaxIterations(max_iterations); + } + } +}; + +struct IterativeSolverState { + Eigen::ComputationInfo info; + Eigen::Index iterations; + double error; + + IterativeSolverState() = delete; + + template + explicit IterativeSolverState(SolverType &&solver) + : info{solver.info()}, + iterations{solver.iterations()}, error{solver.error()} {} +}; + +namespace detail { + +template +std::shared_ptr> +make_shared_solver_with_options(const Eigen::MatrixXd &m, + const IterativeSolverOptions &options) { + auto solver = std::make_shared>(m); + options.configure_solver(*solver); + return solver; +} +} // namespace detail + +template +struct Fit> { + std::vector train_features; + // We have to store this; the solver only takes a reference to it + // and doesn't own it. + Eigen::MatrixXd K_ff; + using SolverType = + Eigen::ConjugateGradient; + // Pretty annoying that you can't copy an Eigen solver object. + // Maybe this is a bad idea for now, but seemingly Albatross + // requires you to be able to copy these. + std::shared_ptr solver; + Eigen::VectorXd information; + + IterativeSolverOptions get_options() const { + return IterativeSolverOptions{*solver}; + } + + IterativeSolverState get_solver_state() const { + return IterativeSolverState{*solver}; + } + + void set_options(const IterativeSolverOptions &options) { + options.configure_solver(*solver); + } + + Fit(const std::vector &train_features_, Eigen::MatrixXd &&K_ff_, + const MarginalDistribution &targets_) + : train_features{train_features_}, K_ff{std::move(K_ff_)}, + // N.B. we give the CG solver a reference to our local member + // matrix. + solver{std::make_shared(K_ff)}, information{solver->solve( + targets_.mean)} { + if (solver->info() != Eigen::Success) { + information.setConstant(std::numeric_limits::quiet_NaN()); + } + } + + Fit(const std::vector &train_features_, Eigen::MatrixXd &&K_ff_, + const MarginalDistribution &targets_, + const IterativeSolverOptions &options) + : train_features{train_features_}, K_ff{std::move(K_ff_)}, + // N.B. we give the CG solver a reference to our local member + // matrix. + solver{ + detail::make_shared_solver_with_options(K_ff, options)}, + information{solver->solve(targets_.mean)} { + if (solver->info() != Eigen::Success) { + information.setConstant(std::numeric_limits::quiet_NaN()); + } + } + + bool operator==(const Fit &other) { + return std::tie(train_features, K_ff, information) == + std::tie(other.train_features, other.K_ff, other.information); + } +}; + +template +inline JointDistribution cg_gp_joint_prediction( + const Eigen::MatrixXd &cross_cov, const Eigen::MatrixXd &prior_cov, + const Eigen::VectorXd &information, const CGSolver &solver) { + JointDistribution ret(gp_mean_prediction(cross_cov, information), + prior_cov - + cross_cov.transpose() * solver.solve(cross_cov)); + if (solver.info() != Eigen::Success) { + ret.covariance.setConstant(std::numeric_limits::quiet_NaN()); + } + + return ret; +} + +template +inline MarginalDistribution cg_gp_marginal_prediction( + const Eigen::MatrixXd &cross_cov, const Eigen::VectorXd &prior_variance, + const Eigen::VectorXd &information, const CGSolver &solver) { + MarginalDistribution ret( + gp_mean_prediction(cross_cov, information), + prior_variance - + (cross_cov.transpose() * solver.solve(cross_cov)).diagonal()); + if (solver.info() != Eigen::Success) { + ret.covariance.diagonal().setConstant( + std::numeric_limits::quiet_NaN()); + } + + return ret; +} + +template +class ConjugateGradientGaussianProcessRegression + : public GaussianProcessBase> { +public: + using Base = GaussianProcessBase>; + + using Base::covariance_function_; + using Base::mean_function_; + + ConjugateGradientGaussianProcessRegression() : Base(){}; + + template , CovFunc>::value, + int> = 0> + ConjugateGradientGaussianProcessRegression(Cov &&covariance_function) + : Base(std::forward(covariance_function)) {} + + template , CovFunc>::value, + int> = 0> + ConjugateGradientGaussianProcessRegression(Cov &&covariance_function, + const std::string &model_name) + : Base(std::forward(covariance_function), model_name) {} + + template , CovFunc>::value, + int> = 0> + ConjugateGradientGaussianProcessRegression( + Cov &&covariance_function, const std::string &model_name, + const IterativeSolverOptions &options) + : Base(std::forward(covariance_function), model_name), + options_{options} {} + + template < + typename Cov, typename Mean, + std::enable_if_t, CovFunc>::value && + std::is_same, MeanFunc>::value, + int> = 0> + ConjugateGradientGaussianProcessRegression(Cov &&covariance_function, + Mean &&mean_function, + const std::string &model_name) + : Base(std::forward(covariance_function), + std::forward(mean_function), model_name) {} + + template < + typename Cov, typename Mean, + std::enable_if_t, CovFunc>::value && + std::is_same, MeanFunc>::value, + int> = 0> + ConjugateGradientGaussianProcessRegression( + Cov &&covariance_function, Mean &&mean_function, + const std::string &model_name, const IterativeSolverOptions &options) + : Base(std::forward(covariance_function), + std::forward(mean_function), model_name), + options_{options} {} + + template < + typename FeatureType, + std::enable_if_t< + has_call_operator::value, int> = 0> + Fit> + _fit_impl(const std::vector &features, + const MarginalDistribution &targets) const { + Eigen::MatrixXd K_ff = covariance_function_(features, Base::threads_.get()); + MarginalDistribution zero_mean_targets(targets); + K_ff += targets.covariance; + mean_function_.remove_from(features, &zero_mean_targets.mean); + // Clamp this so that we never try to iterate more than the + // theoretical max of CG + auto options = options_; + options.max_iterations = std::min(options_.max_iterations, K_ff.rows()); + return Fit>{ + features, std::move(K_ff), std::move(zero_mean_targets), options}; + } + + template < + typename FeatureType, typename FitFeatureType, + std::enable_if_t< + has_call_operator::value && + has_call_operator::value, + int> = 0> + JointDistribution _predict_impl( + const std::vector &features, + const Fit> + &cg_gp_fit, + PredictTypeIdentity &&) const { + const auto cross_cov = + covariance_function_(cg_gp_fit.train_features, features); + Eigen::MatrixXd prior_cov{covariance_function_(features)}; + auto pred = cg_gp_joint_prediction( + cross_cov, prior_cov, cg_gp_fit.information, *cg_gp_fit.solver); + mean_function_.add_to(features, &pred.mean); + return pred; + } + + template < + typename FeatureType, typename FitFeatureType, + std::enable_if_t< + has_call_operator::value && + has_call_operator::value, + int> = 0> + MarginalDistribution _predict_impl( + const std::vector &features, + const Fit> + &cg_gp_fit, + PredictTypeIdentity &&) const { + const auto cross_cov = + covariance_function_(cg_gp_fit.train_features, features); + Eigen::VectorXd prior_variance(Eigen::VectorXd::NullaryExpr( + cast::to_index(features.size()), [this, &features](Eigen::Index i) { + const auto &f = features[cast::to_size(i)]; + return covariance_function_(f, f); + })); + auto pred = cg_gp_marginal_prediction( + cross_cov, prior_variance, cg_gp_fit.information, *cg_gp_fit.solver); + mean_function_.add_to(features, &pred.mean); + return pred; + } + + template < + typename FeatureType, typename FitFeatureType, + std::enable_if_t< + has_call_operator::value && + has_call_operator::value, + int> = 0> + Eigen::VectorXd _predict_impl( + const std::vector &features, + const Fit> + &cg_gp_fit, + PredictTypeIdentity &&) const { + const auto cross_cov = + covariance_function_(cg_gp_fit.train_features, features); + auto pred = gp_mean_prediction(cross_cov, cg_gp_fit.information); + mean_function_.add_to(features, &pred); + return pred; + } + + IterativeSolverOptions get_options() const { return options_; } + + void set_options(const IterativeSolverOptions &options) { + options_ = options; + } + +private: + IterativeSolverOptions options_{}; +}; + +// TODO(@peddie): this is known not to have any effect for stationary +// kernels, but IncompleteCholesky assumes a `twistedBy` (sparse +// self-adjoint view) member, so this is all we can do for the moment. +using AlbatrossCGDefaultPreconditioner = Eigen::DiagonalPreconditioner; + +template +auto cg_gp_from_covariance( + CovFunc &&covariance_function, const std::string &model_name, + const IterativeSolverOptions &options = IterativeSolverOptions{}, + Preconditioner && = Preconditioner{}) { + return ConjugateGradientGaussianProcessRegression< + typename std::decay::type, decltype(ZeroMean()), Preconditioner>( + std::forward(covariance_function), model_name, options); +} + +template +auto cg_gp_from_mean_and_covariance( + CovFunc &&covariance_function, MeanFunc &&mean_function, + const std::string &model_name, + const IterativeSolverOptions &options = IterativeSolverOptions{}, + Preconditioner && = Preconditioner{}) { + return ConjugateGradientGaussianProcessRegression< + typename std::decay::type, typename std::decay::type, + Preconditioner>(std::forward(covariance_function), + std::forward(mean_function), model_name, + options); +} + +} // namespace albatross \ No newline at end of file diff --git a/tests/test_cg_gp.cc b/tests/test_cg_gp.cc new file mode 100644 index 00000000..ba1cd68a --- /dev/null +++ b/tests/test_cg_gp.cc @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2025 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include + +#include "test_models.h" +#include + +#include + +namespace albatross { + +std::ostream &operator<<(std::ostream &os, Eigen::ComputationInfo info) { + switch (info) { + case Eigen::Success: + os << "Success"; + break; + case Eigen::NumericalIssue: + os << "NumericalIssue"; + break; + case Eigen::NoConvergence: + os << "NoConvergence"; + break; + case Eigen::InvalidInput: + os << "InvalidInput"; + break; + default: + os << ""; + }; + return os; +} + +namespace { +template +void describe_fit(const FitType &fit, std::ostream &os = std::cout) { + const auto options = fit.get_options(); + const auto state = fit.get_solver_state(); + os << " Status: " << state.info << std::endl; + os << " Tolerance: " << options.tolerance << std::endl; + os << " Error: " << state.error << std::endl; + os << "Max iterations: " << options.max_iterations << std::endl; + os << " Iterations: " << state.iterations << std::endl; +} +} // namespace + +TEST(TestConjugateGradientGP, TestMean) { + using CovFunc = SquaredExponential; + + CovFunc covariance(0.5, 1); + auto dataset = make_toy_linear_data(5, 0.01, 1.e-2, 4000); + auto direct = gp_from_covariance(covariance, "direct"); + // auto cg = cg_gp_from_covariance(covariance, "cg", IterativeSolverOptions{}, + // Eigen::PartialCholesky{100, 1.e-1}); + auto cg = cg_gp_from_covariance(covariance, "cg"); + + auto begin = std::chrono::steady_clock::now(); + auto direct_fit = direct.fit(dataset); + std::cout << "direct fit: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + begin = std::chrono::steady_clock::now(); + auto cg_fit = cg.fit(dataset); + std::cout << "cg fit: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + + describe_fit(cg_fit.get_fit()); + auto test_features = linspace(0.01, 9.9, 100); + + begin = std::chrono::steady_clock::now(); + auto direct_pred = + direct_fit.predict_with_measurement_noise(test_features).joint(); + std::cout << "direct predict: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + + begin = std::chrono::steady_clock::now(); + auto cg_pred = cg_fit.predict_with_measurement_noise(test_features).joint(); + std::cout << "cg predict: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + describe_fit(cg_fit.get_fit()); + + EXPECT_TRUE(cg_pred.mean.array().isFinite().all()); + EXPECT_TRUE(cg_pred.covariance.array().isFinite().all()); + + double mean_error = (direct_pred.mean - cg_pred.mean).norm(); + EXPECT_LT(mean_error, 1.e-9); + + double cov_error = (direct_pred.covariance - cg_pred.covariance).norm(); + EXPECT_LT(cov_error, 1.e-9); + + begin = std::chrono::steady_clock::now(); + auto direct_marginal = + direct_fit.predict_with_measurement_noise(test_features).marginal(); + std::cout << "direct marginal: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + + begin = std::chrono::steady_clock::now(); + auto cg_marginal = + cg_fit.predict_with_measurement_noise(test_features).marginal(); + std::cout << "cg marginal: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + describe_fit(cg_fit.get_fit()); + double marginal_error = (direct_marginal.covariance.diagonal() - + cg_marginal.covariance.diagonal()) + .norm(); + EXPECT_LT(marginal_error, 1.e-9); + + begin = std::chrono::steady_clock::now(); + auto direct_mean = + direct_fit.predict_with_measurement_noise(test_features).mean(); + std::cout << "direct mean: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + + begin = std::chrono::steady_clock::now(); + auto cg_mean = cg_fit.predict_with_measurement_noise(test_features).mean(); + std::cout << "cg mean: " + << std::chrono::duration_cast( + std::chrono::steady_clock::now() - begin) + .count() / + 1.e9 + << std::endl; + describe_fit(cg_fit.get_fit()); + double mean_predict_error = (direct_mean - cg_mean).norm(); + EXPECT_LT(mean_predict_error, 1.e-9); +} + +} // namespace albatross \ No newline at end of file diff --git a/tests/test_partial_cholesky.cc b/tests/test_partial_cholesky.cc new file mode 100644 index 00000000..91da86ea --- /dev/null +++ b/tests/test_partial_cholesky.cc @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2025 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include + +#include "test_models.h" +#include + +#include + +namespace albatross { + +TEST(PartialCholesky, Construct) { + Eigen::PartialCholesky p; + EXPECT_EQ(Eigen::Success, p.info()); +} + +static constexpr const std::size_t cDefaultSeed = 22; +static constexpr const Eigen::Index cExampleSize = 10; + +static std::gamma_distribution cEigenvalueDistribution(2, 2); + +TEST(PartialCholesky, Compute) { + Eigen::PartialCholesky p; + ASSERT_EQ(Eigen::Success, p.info()); + + std::default_random_engine gen{cDefaultSeed}; + const auto m = + random_covariance_matrix(cExampleSize, cEigenvalueDistribution, gen); + + p.compute(m); + + ASSERT_EQ(Eigen::Success, p.info()); + + EXPECT_LT((p.reconstructedMatrix() - m).norm(), 1.e-9) + << p.reconstructedMatrix() - m; +} + +namespace { + +Eigen::MatrixXd add_diagonal(const Eigen::MatrixXd &A, double sigma) { + return A + + Eigen::MatrixXd( + Eigen::VectorXd::Constant(A.rows(), sigma * sigma).asDiagonal()); +} + +} // namespace + +TEST(PartialCholesky, Solve) { + Eigen::PartialCholesky p; + ASSERT_EQ(Eigen::Success, p.info()); + + std::default_random_engine gen{cDefaultSeed}; + const auto m = + random_covariance_matrix(cExampleSize, cEigenvalueDistribution, gen); + const Eigen::VectorXd b{Eigen::VectorXd::NullaryExpr(cExampleSize, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + p.compute(m); + ASSERT_EQ(Eigen::Success, p.info()); + + const Eigen::VectorXd x = p.solve(b); + ASSERT_EQ(Eigen::Success, p.info()); + + const Eigen::VectorXd x_llt = add_diagonal(m, p.nugget()).ldlt().solve(b); + EXPECT_LT((x - x_llt).norm(), p.nugget()); +} + +TEST(PartialCholesky, SolveMatrix) { + Eigen::PartialCholesky p; + ASSERT_EQ(Eigen::Success, p.info()); + + std::default_random_engine gen{cDefaultSeed}; + const auto m = + random_covariance_matrix(cExampleSize, cEigenvalueDistribution, gen); + const Eigen::MatrixXd b{ + Eigen::MatrixXd::NullaryExpr(cExampleSize, cExampleSize, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + p.compute(m); + ASSERT_EQ(Eigen::Success, p.info()); + + const Eigen::MatrixXd x = p.solve(b); + ASSERT_EQ(Eigen::Success, p.info()); + + const Eigen::MatrixXd x_llt = add_diagonal(m, p.nugget()).ldlt().solve(b); + EXPECT_LT((x - x_llt).norm(), 1.e-9); +} + +static constexpr const std::size_t cNumRandomExamples = 222; +static constexpr const std::size_t cMaxDecompositionOrder = 30; + +TEST(PartialCholesky, RandomCompleteProblems) { + std::default_random_engine gen{cDefaultSeed}; + std::uniform_int_distribution decomp_order_dist( + 1, cMaxDecompositionOrder); + + for (std::size_t i = 0; i < cNumRandomExamples; ++i) { + const Eigen::Index decomp_order{decomp_order_dist(gen)}; + // Generate matrices small enough that we always do a complete + // Cholesky factorisation. + std::uniform_int_distribution problem_size_dist(1, + decomp_order); + const Eigen::Index problem_size{problem_size_dist(gen)}; + const auto m = + random_covariance_matrix(problem_size, cEigenvalueDistribution, gen); + const Eigen::VectorXd b{ + Eigen::VectorXd::NullaryExpr(problem_size, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + Eigen::PartialCholesky p; + ASSERT_EQ(Eigen::Success, p.info()); + p.setOrder(decomp_order); + + p.compute(m); + ASSERT_EQ(Eigen::Success, p.info()); + + EXPECT_LT((p.reconstructedMatrix() - m).norm(), 1.e-9) + << p.reconstructedMatrix() - m << "\neigs:\n" + << Eigen::SelfAdjointEigenSolver(m) + .eigenvalues() + .transpose(); + + const Eigen::VectorXd x = p.solve(b); + ASSERT_EQ(Eigen::Success, p.info()); + + const Eigen::VectorXd x_llt = + (m + Eigen::MatrixXd( + Eigen::VectorXd::Constant(m.rows(), p.nugget() * p.nugget()) + .asDiagonal())) + .ldlt() + .solve(b); + // A cleverer implementation would derive the nugget size from the + // eigenvalues of the original covariance rather than add a fudge + // factor. + EXPECT_LT((x - x_llt).norm(), 1.e-8) + << "\neigs:\n" + << Eigen::SelfAdjointEigenSolver(m) + .eigenvalues() + .transpose(); + } +} + +namespace { + +Eigen::VectorXd spd_eigs(const Eigen::MatrixXd &m) { + return Eigen::SelfAdjointEigenSolver(m).eigenvalues(); +} + +double condition_number(const Eigen::MatrixXd &m) { + Eigen::VectorXd eigs{spd_eigs(m)}; + return eigs.array().abs().maxCoeff() / eigs.array().abs().minCoeff(); +} + +} // namespace + +TEST(PartialCholesky, IncreasingRank) { + std::default_random_engine gen{cDefaultSeed}; + const Eigen::MatrixXd m = + random_covariance_matrix(cExampleSize, gen); + const Eigen::VectorXd b{Eigen::VectorXd::NullaryExpr(cExampleSize, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + std::vector> decomps(cExampleSize); + + // Eigen::IOFormat high_prec_python(16, 0, ", ", "\n", "[", "]", "[", "]"); + // std::cout << m.format(high_prec_python) << std::endl; + + const Eigen::MatrixXd m_diag{add_diagonal(m, decomps[0].nugget())}; + // std::cout << m_diag.format(high_prec_python) << std::endl; + const double mcond{condition_number(m_diag)}; + + decomps[0].setOrder(1); + // decomps[0].setNugget(std::sqrt(m.diagonal().minCoeff())); + decomps[0].compute(m); + ASSERT_EQ(decomps[0].info(), Eigen::Success); + + const auto triangle = [](Eigen::Index index, + const auto &a) -> Eigen::MatrixXd { + return a.matrixL() + .topLeftCorner(index, index) + .template triangularView(); + }; + + const auto triangle_compare = + [&decomps, &triangle](Eigen::Index index) -> Eigen::MatrixXd { + return triangle(index, decomps[index - 1]) - + triangle(index, decomps[index]); + }; + + const auto bottom_left = [](Eigen::Index index, + const auto &a) -> Eigen::MatrixXd { + return (a.permutationsP() * a.matrixL()) + .bottomLeftCorner(a.cols() - index, index); + }; + + const auto ordered_bottom_left_compare = + [&decomps, &bottom_left](Eigen::Index index) -> Eigen::MatrixXd { + return bottom_left(index, decomps[index - 1]) - + bottom_left(index, decomps[index]); + }; + + const auto reconstruction_error = [&m](const auto &a) -> Eigen::MatrixXd { + return a.reconstructedMatrix() - m; + }; + + const auto preconditioned_condition = [&m_diag](const auto &a) { + return condition_number(a.solve(m_diag)); + }; + + // const auto preconditioned_eigs = [&m_diag](const auto &a) { + // return spd_eigs(a.solve(m_diag)); + // }; + + for (Eigen::Index k = 1; k < cExampleSize; ++k) { + const Eigen::Index rank = k + 1; + decomps[k].setOrder(rank); + // decomps[k].setNugget(std::sqrt(m.diagonal().minCoeff())); + decomps[k].compute(m); + ASSERT_EQ(decomps[k].info(), Eigen::Success); + // The top left triangle should be the same between successive + // low-rank approximations + EXPECT_LT(triangle_compare(k).norm(), 1.e-9) + << "k = " << k << "; " << triangle(k, decomps[k - 1]) << "\n" + << triangle(k, decomps[k]); + // The bottom left block below the top triangle corresponds to the + // pivot / non-pivot cross-correlation block and should be the + // same up to a permutation. + EXPECT_LT(ordered_bottom_left_compare(k).norm(), 1.e-9) + << "k = " << k << "; " << bottom_left(k, decomps[k - 1]) << "\n" + << bottom_left(k, decomps[k]); + // The leftover Schur complement determinant should decrease with + // increasing approximation rank + EXPECT_LT(decomps[k].error(), decomps[k - 1].error()) << "k = " << k; + // The reconstructed matrix should get closer and closer to the + // true matrix as we increase the approximation rank + EXPECT_LT(reconstruction_error(decomps[k]).norm(), + reconstruction_error(decomps[k - 1]).norm()) + << "k = " << k; + // The whole point of this decomposition is that preconditioning + // by it should improve the condition number of the original + // problem + EXPECT_LT(preconditioned_condition(decomps[k]), + preconditioned_condition(decomps[k - 1])) + << "k = " << k << " mcond: " << mcond + << " err: " << decomps[k].error(); + // << "\nm_diag eigs: " << spd_eigs(m_diag).transpose() + // << "\nprecond eigs: " + // << spd_eigs( + // decomps[k].solve(Eigen::MatrixXd::Identity(m.rows(), m.cols()))) + // .transpose() + // << "\nprecond eigs direct: " + // << spd_eigs(decomps[k].direct_solve().solve( + // Eigen::MatrixXd::Identity(m.rows(), m.cols()))) + // .transpose() + // << "\n k eigs: " << preconditioned_eigs(decomps[k]).transpose() + // << "\nk - 1 eigs: " << preconditioned_eigs(decomps[k - 1]).transpose(); + EXPECT_LT(preconditioned_condition(decomps[k]), mcond) + << "k = " << k << " err: " << decomps[k].error(); // << "\nL:\n" + // << decomps[k].matrixL() << "\nP:\n" + // << Eigen::MatrixXi(decomps[k].permutationsP()) + // << "\nm_diag eigs: " << spd_eigs(m_diag).transpose() + // << "\nprecond eigs: " + // << spd_eigs( + // decomps[k].solve(Eigen::MatrixXd::Identity(m.rows(), + // m.cols()))) .transpose() + // << "\nprecond eigs direct: " + // << spd_eigs(decomps[k].direct_solve().solve( + // Eigen::MatrixXd::Identity(m.rows(), m.cols()))) + // .transpose() + // << "\ndiag eigs: " + // << spd_eigs(m * m.diagonal().array().inverse().matrix().asDiagonal()) + // .transpose() + // << "\n k eigs: " << preconditioned_eigs(decomps[k]).transpose() + // << "\nk - 1 eigs: " << preconditioned_eigs(decomps[k - + // 1]).transpose(); + // if (k == 2) { + // std::cout << "k = 2, rank 3. precond(m_diag):\n" + // << decomps[k].solve(m_diag) << std::endl; + // } + } +} + +// TEST(PartialCholesky, Approximate) { +// std::default_random_engine gen{cDefaultSeed}; +// const Eigen::MatrixXd m = +// random_covariance_matrix(cExampleSize, cEigenvalueDistribution, gen); +// const Eigen::VectorXd b{Eigen::VectorXd::NullaryExpr(cExampleSize, [&gen]() +// { +// return std::normal_distribution(0, 1)(gen); +// })}; + +// Eigen::PartialCholesky p; +// ASSERT_EQ(Eigen::Success, p.info()); +// p.setOrder(2); +// p.compute(m); +// ASSERT_EQ(Eigen::Success, p.info()); +// const Eigen::MatrixXd L{p.matrixL()}; +// for (Eigen::Index i = 1; i < L.cols(); ++i) { +// EXPECT_LT(L(i, i), L(i - 1, i - 1)) << L.diagonal().transpose(); +// } +// const Eigen::VectorXd x = p.solve(b); +// ASSERT_EQ(Eigen::Success, p.info()); +// Eigen::MatrixXd preconditioned{p.solve(m)}; +// Eigen::SelfAdjointEigenSolver preeigs(preconditioned); +// std::cout << "A:\n" << m << std::endl; +// std::cout << "reconstructed:\n" << p.reconstructedMatrix() << std::endl; +// std::cout << "error: " << p.error() << std::endl; +// std::cout << "diff:\n" << m - p.reconstructedMatrix() << std::endl; +// std::cout << "L:\n" << p.matrixL() << std::endl; +// std::cout << "P:\n" << Eigen::MatrixXi(p.permutationsP()) << std::endl; +// std::cout << m * x - b << std::endl; +// std::cout << m * (p.reconstructedMatrix().ldlt().solve(b)) - b << +// std::endl; std::cout << "Aapprox^-1 A:\n" << preconditioned << std::endl; +// Eigen::SelfAdjointEigenSolver eigs(m); +// std::cout << "A eigs: " << eigs.eigenvalues().transpose() << std::endl; +// std::cout << "Aapprox^-1 A eigs: " << preeigs.eigenvalues().transpose() +// << std::endl; +// Eigen::PartialCholesky p3; +// p3.setOrder(3); +// p3.compute(m); +// std::cout << "L:\n" << p3.matrixL() << std::endl; +// std::cout << "P:\n" << Eigen::MatrixXi(p3.permutationsP()) << std::endl; +// } + +TEST(PartialCholesky, Precondition) { + std::default_random_engine gen{cDefaultSeed}; + const auto m = + random_covariance_matrix(cExampleSize, cEigenvalueDistribution, gen); + const Eigen::VectorXd b{Eigen::VectorXd::NullaryExpr(cExampleSize, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + Eigen::ConjugateGradient> + solver(m); + solver.preconditioner().setOrder(cExampleSize / 4); + + const Eigen::VectorXd x = solver.solve(b); + EXPECT_EQ(solver.info(), Eigen::Success); + EXPECT_LT((m * x - b).norm(), 1.e-9); + + Eigen::ConjugateGradient solver0(m); + + const Eigen::VectorXd x0 = solver0.solve(b); + EXPECT_EQ(solver0.info(), Eigen::Success); + EXPECT_LT(solver.iterations(), solver0.iterations()); +} + +static constexpr const std::size_t cNumRandomProblems = 20; +static constexpr const Eigen::Index cMinProblemSize = 100; +static constexpr const Eigen::Index cMaxProblemSize = 300; + +TEST(PartialCholesky, PreconditionRandomProblems) { + std::default_random_engine gen{cDefaultSeed}; + std::uniform_int_distribution problem_size_dist( + cMinProblemSize, cMaxProblemSize); + std::size_t fail = 0; + std::size_t condfail = 0; + double reduction = 0; + double dcond = 0; + for (std::size_t i = 0; i < cNumRandomProblems; ++i) { + const Eigen::Index problem_size{problem_size_dist(gen)}; + const auto m = random_covariance_matrix(problem_size, gen); + const Eigen::VectorXd b{ + Eigen::VectorXd::NullaryExpr(problem_size, [&gen]() { + return std::normal_distribution(0, 1)(gen); + })}; + + Eigen::ConjugateGradient> + solver(m); + EXPECT_EQ(solver.info(), Eigen::Success) + << "Preconditioned solver failed to decompose"; + solver.preconditioner().setOrder( + std::max(problem_size / 20, Eigen::Index{20})); + + const Eigen::VectorXd x = solver.solve(b); + EXPECT_EQ(solver.info(), Eigen::Success) + << "Preconditioned solver failed to solve"; + EXPECT_LT((m * x - b).norm(), 1.e-9); + + Eigen::ConjugateGradient + solver0(m); + EXPECT_EQ(solver0.info(), Eigen::Success) + << "Default solver failed to decompose"; + const Eigen::VectorXd x0 = solver0.solve(b); + EXPECT_EQ(solver0.info(), Eigen::Success) + << "Default solver failed to solve"; + const auto cond0 = condition_number(m); + EXPECT_LE(solver.iterations(), solver0.iterations()) + << "Problem size: " << problem_size << "; cond " << cond0; + + const auto condp = condition_number(solver.preconditioner().solve(m)); + + EXPECT_LE(condp, cond0) + << "Problem size: " << problem_size + << "; cond: " << std::setprecision(5) << cond0 + << "; preconditioned: " << condp << "; damage: " << condp / cond0; + + // std::cout << i << ": cond " << cond0 << " -> " << condp << " = " + // << condp / cond0 << std::endl; + if (solver.iterations() > solver0.iterations()) { + fail++; + } + if (condp > cond0) { + condfail++; + } + dcond += condp / cond0; + reduction += static_cast(solver.iterations()) / + static_cast(solver0.iterations()); + } + + std::cout << "More iterations on " << fail << " of " << cNumRandomProblems + << " problems." << std::endl; + std::cout << "Worse conditioning on " << condfail << " of " + << cNumRandomProblems << " problems." << std::endl; + reduction = std::pow(reduction, + 2 / static_cast(cNumRandomProblems - fail + 1)); + std::cout << "Geom mean iteration reduction: " << reduction << std::endl; + dcond = std::pow(dcond, 2 / static_cast(cNumRandomProblems + 1)); + std::cout << "Geom mean condition reduction: " << dcond << std::endl; +} + +} // namespace albatross \ No newline at end of file