From 2e529e0652e853c066d7025daf8e384e82a03b24 Mon Sep 17 00:00:00 2001 From: WT Date: Mon, 21 Mar 2022 12:22:55 +0000 Subject: [PATCH 1/9] Pseudo-observation parametrisations --- src/ApproximateGPs.jl | 4 + src/SparseVariationalApproximationModule.jl | 218 +++++++++++++++++-- test/SparseVariationalApproximationModule.jl | 76 +++++++ 3 files changed, 278 insertions(+), 20 deletions(-) diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index c8dd643c..2e2344a9 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -14,6 +14,10 @@ include("SparseVariationalApproximationModule.jl") SparseVariationalApproximation, Centered, NonCentered @reexport using .SparseVariationalApproximationModule: DefaultQuadrature, Analytic, GaussHermite, MonteCarlo +@reexport using .SparseVariationalApproximationModule: + PseudoObsSparseVariationalApproximation, + ObsCovLikelihood, + DecoupledObsCovLikelihood include("LaplaceApproximationModule.jl") @reexport using .LaplaceApproximationModule: LaplaceApproximation diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index f388d1b2..d58221cf 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -2,7 +2,10 @@ module SparseVariationalApproximationModule using ..API -export SparseVariationalApproximation, Centered, NonCentered +export SparseVariationalApproximation, + Centered, + NonCentered, + PseudoObsSparseVariationalApproximation using ..ApproximateGPs: _chol_cov, _cov using Distributions @@ -28,6 +31,13 @@ using GPLikelihoods: GaussianLikelihood export DefaultQuadrature, Analytic, GaussHermite, MonteCarlo include("expected_loglik.jl") +""" + abstract type AbstractSparseVariationalApproximation end + +Supertype for sparse variational approximations. +""" +abstract type AbstractSparseVariationalApproximation end + @doc raw""" Centered() @@ -59,7 +69,9 @@ See also [`Centered`](@ref). """ struct NonCentered end -struct SparseVariationalApproximation{Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal} +struct SparseVariationalApproximation{ + Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal +} <: AbstractSparseVariationalApproximation fz::Tfz q::Tq end @@ -190,14 +202,14 @@ function AbstractGPs.posterior(sva::SparseVariationalApproximation{NonCentered}) end function AbstractGPs.posterior( - sva::SparseVariationalApproximation, fx::FiniteGP, ::AbstractVector{<:Real} + sva::AbstractSparseVariationalApproximation, fx::FiniteGP, ::AbstractVector{<:Real} ) @assert sva.fz.f === fx.f return posterior(sva) end function AbstractGPs.posterior( - sva::SparseVariationalApproximation, lfx::LatentFiniteGP, ::Any + sva::AbstractSparseVariationalApproximation, lfx::LatentFiniteGP, ::Any ) @assert sva.fz.f === lfx.fx.f return posterior(sva) @@ -209,7 +221,7 @@ end # function Statistics.mean( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, x::AbstractVector + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector ) return mean(f.prior, x) + cov(f.prior, x, inducing_points(f)) * f.data.α end @@ -224,21 +236,21 @@ end _A(f, x) = first(_A_and_Kuf(f, x)) function Statistics.cov( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, x::AbstractVector + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector ) A = _A(f, x) return cov(f.prior, x) - At_A(A) + At_A(f.data.B' * A) end function Statistics.var( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, x::AbstractVector + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector ) A = _A(f, x) return var(f.prior, x) - diag_At_A(A) + diag_At_A(f.data.B' * A) end function StatsBase.mean_and_cov( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, x::AbstractVector + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector ) A, Kuf = _A_and_Kuf(f, x) μ = mean(f.prior, x) + Kuf' * f.data.α @@ -247,7 +259,7 @@ function StatsBase.mean_and_cov( end function StatsBase.mean_and_var( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, x::AbstractVector + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector ) A, Kuf = _A_and_Kuf(f, x) μ = mean(f.prior, x) + Kuf' * f.data.α @@ -256,7 +268,7 @@ function StatsBase.mean_and_var( end function Statistics.cov( - f::ApproxPosteriorGP{<:SparseVariationalApproximation}, + f::ApproxPosteriorGP{<:AbstractSparseVariationalApproximation}, x::AbstractVector, y::AbstractVector, ) @@ -277,14 +289,17 @@ inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) = f.appr # function API.approx_lml( - sva::SparseVariationalApproximation, l_fx::Union{FiniteGP,LatentFiniteGP}, ys; kwargs... + sva::AbstractSparseVariationalApproximation, l_fx::Union{FiniteGP,LatentFiniteGP}, ys; + kwargs... ) return elbo(sva, l_fx, ys; kwargs...) end +_get_prior(approx::SparseVariationalApproximation) = approx.fz.f + """ elbo( - sva::SparseVariationalApproximation, + sva::AbstractSparseVariationalApproximation, fx::FiniteGP, y::AbstractVector{<:Real}; num_data=length(y), @@ -310,18 +325,18 @@ variational Gaussian process classification." Artificial Intelligence and Statistics. PMLR, 2015. """ function AbstractGPs.elbo( - sva::SparseVariationalApproximation, + sva::AbstractSparseVariationalApproximation, fx::FiniteGP{<:AbstractGP,<:AbstractVector,<:Diagonal{<:Real,<:Fill}}, y::AbstractVector{<:Real}; num_data=length(y), quadrature=DefaultQuadrature(), ) - @assert sva.fz.f === fx.f + @assert _get_prior(sva) === fx.f return _elbo(quadrature, sva, fx, y, GaussianLikelihood(fx.Σy[1]), num_data) end function AbstractGPs.elbo( - ::SparseVariationalApproximation, ::FiniteGP, ::AbstractVector; kwargs... + ::AbstractSparseVariationalApproximation, ::FiniteGP, ::AbstractVector; kwargs... ) return error( "The observation noise fx.Σy must be homoscedastic.\n", @@ -332,7 +347,7 @@ end """ elbo( - sva::SparseVariationalApproximation, + sva::AbstractSparseVariationalApproximation, lfx::LatentFiniteGP, y::AbstractVector; num_data=length(y), @@ -342,26 +357,26 @@ end Compute the ELBO for a LatentGP with a possibly non-conjugate likelihood. """ function AbstractGPs.elbo( - sva::SparseVariationalApproximation, + sva::AbstractSparseVariationalApproximation, lfx::LatentFiniteGP, y::AbstractVector; num_data=length(y), quadrature=DefaultQuadrature(), ) - @assert sva.fz.f === lfx.fx.f + @assert _get_prior(sva) === lfx.fx.f return _elbo(quadrature, sva, lfx.fx, y, lfx.lik, num_data) end # Compute the common elements of the ELBO function _elbo( quadrature::QuadratureMethod, - sva::SparseVariationalApproximation, + sva::AbstractSparseVariationalApproximation, fx::FiniteGP, y::AbstractVector, lik, num_data::Integer, ) - @assert sva.fz.f === fx.f + @assert _get_prior(sva) === fx.f f_post = posterior(sva) q_f = marginals(f_post(fx.x)) @@ -385,4 +400,167 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered}) return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2 end + + +# +# Pseudo-Observation Parametrisations of q(u). +# + + +@doc raw""" + PseudoObsSparseVariationalApproximation( + likelihood, f::AbstractGP, z::AbstractVector + ) + +Parametrises `q(f(z))`, the approximate posterior at `f(z)`, using a surrogate likelihood, +`likelihood`: `q(f(z)) ∝ p(f(z)) likelihood(f(z))`. +""" +struct PseudoObsSparseVariationalApproximation{ + Tlikelihood, Tf<:AbstractGP, Tz<:AbstractVector +} <: AbstractSparseVariationalApproximation + likelihood::Tlikelihood + f::Tf + z::Tz +end + +_get_prior(approx::PseudoObsSparseVariationalApproximation) = approx.f + +@doc raw""" + ObsCovLikelihood(S::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) + +Chooses `likelihood(u) = N(y; u, S)`. `length(y)` must be equal to the number of +pseudo-points utilised in the sparse variational approximation. +""" +struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real}, Ty<:AbstractVector{<:Real}} + S::TS + y::Ty +end + +@doc raw""" + PseudoObsSparseVariationalApproximation( + f::AbstractGP, + z::AbstractVector, + S::AbstractMatrix{<:Real}, + y::AbstractVector{<:Real}, + ) + +Convenience constuctor. +Equivalent to +```julia +PseudoObsSparseVariationalApproximation(ObsCovLikelihood(S, y), f, z) +``` +""" +function PseudoObsSparseVariationalApproximation( + f::AbstractGP, z::AbstractVector, S::AbstractMatrix{<:Real}, y::AbstractVector{<:Real} +) + return PseudoObsSparseVariationalApproximation(ObsCovLikelihood(S, y), f, z) +end + +function AbstractGPs.posterior( + approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood} +) + f = approx.f + z = approx.z + y = approx.likelihood.y + S = approx.likelihood.S + return posterior(f(z, S), y) +end + +function _prior_kl( + approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood} +) + f = approx.f + z = approx.z + y = approx.likelihood.y + S = approx.likelihood.S + + # log marginal probability of pseudo-observations. + logp_pseudo_obs = logpdf(f(z, S), y) + + # pseudo-reconstruction term. + m, C = mean_and_cov(posterior(approx)(z)) + S_chol = cholesky(AbstractGPs._symmetric(S)) + pseudo_lik = -( + length(y) * AbstractGPs.log2π + logdet(S_chol) + sum(abs2, S_chol.U' \ (y - m)) + ) / 2 + trace_term = tr(S_chol \ C) / 2 + return -logp_pseudo_obs + pseudo_lik - trace_term +end + +@doc raw""" + DecoupledObsCovLikelihood( + S::AbstractMatrix{<:Real}, v::AbstractVector, y::AbstractVector{<:Real} + ) + +Chooses `likelihood(u) = N(y; f(v), S)` where `length(y)` need not be equal to the number +of pseudo-points, where `f` is the GP to which this likelihood specifies the approximate +posterior over `f(z)`. +""" +struct DecoupledObsCovLikelihood{ + TS<:Diagonal{<:Real}, Tv<:AbstractVector, Ty<:AbstractVector{<:Real} +} + S::TS + v::Tv + y::Ty +end + +@doc raw""" + PseudoObsSparseVariationalApproximation( + f::AbstractGP, + z::AbstractVector, + S::Diagonal{<:Real}, + v::AbstractVector, + y::AbstractVector{<:Real}, + ) + +Convenience constructor. +Equivalent to +```julia +PseudoObsSparseVariationalApproximation(DecoupledObsCovLikelihood(S, v, y), f, z) +``` +""" +function PseudoObsSparseVariationalApproximation( + f::AbstractGP, + z::AbstractVector, + S::Diagonal{<:Real}, + v::AbstractVector, + y::AbstractVector{<:Real}, +) + return PseudoObsSparseVariationalApproximation(DecoupledObsCovLikelihood(S, v, y), f, z) +end + +function AbstractGPs.posterior( + approx::PseudoObsSparseVariationalApproximation{<:DecoupledObsCovLikelihood} +) + f = approx.f + z = approx.z + y = approx.likelihood.y + S = approx.likelihood.S + v = approx.likelihood.v + return posterior(AbstractGPs.VFE(f(z, 1e-9)), f(v, S), y) +end + +function _prior_kl( + approx::PseudoObsSparseVariationalApproximation{<:DecoupledObsCovLikelihood} +) + f = approx.f + z = approx.z + y = approx.likelihood.y + S = approx.likelihood.S + v = approx.likelihood.v + + # log marginal probability of pseudo-observations. Utilises DTC code. + logp_pseudo_obs = AbstractGPs.dtc(AbstractGPs.VFE(f(z)), f(v, S), y) + + # pseudo-reconstruction term. + m̂, Ĉ = mean_and_cov(posterior(approx)(z, 1e-18)) + At = cholesky(AbstractGPs._symmetric(cov(f(z, 1e-18)))) \ cov(f, z, v) + m = mean(f, v) + At' * (m̂ - mean(f, z)) + pseudo_loglik = sum(map((m, s, y) -> logpdf(Normal(m, sqrt(s)), y), m, diag(S), y)) + pseudo_trace_term = sum(Ĉ .* (At * (S \ At'))) / 2 + pseudo_reconstruction = (pseudo_loglik - pseudo_trace_term) + + return -logp_pseudo_obs + pseudo_reconstruction +end + end diff --git a/test/SparseVariationalApproximationModule.jl b/test/SparseVariationalApproximationModule.jl index 104949f2..5c262ec2 100644 --- a/test/SparseVariationalApproximationModule.jl +++ b/test/SparseVariationalApproximationModule.jl @@ -188,4 +188,80 @@ @test all(isapprox.(cov(gpr_post, x), cov(svgp_post, x), atol=1e-4)) end end + @testset "PseudoObs" begin + rng = Xoshiro(123456) + + # Generate data. + f = GP(sin, SEKernel()) + x = range(-5.0, 5.0; length=11) + s = 0.1 + y = rand(rng, f(x, s)) + + z = range(-6.0, 6.0; length=7) + + @testset "Coupled Formulation" begin + + # Generate pseudo-data. + ŷ = randn(rng, length(z)) + _S = randn(rng, length(z), length(z)) + Ŝ = _S * _S' + I + + # Construct approximate posterior. + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, ŷ) + + # Check that the posterior produced satisfies the AbstractGPs API. + approx_posterior = posterior(approx) + AbstractGPs.TestUtils.test_internal_abstractgps_interface( + rng, approx_posterior, x, z + ) + + # Check that the posterior is close to an equivalent Centered approximation. + @testset "compare against equivalent centered" begin + qu = approx_posterior(z, 1e-12) + approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) + approx_post_centered = posterior(approx_centered) + + approx_post_x = approx_posterior(x, s) + approx_post_centered_x = approx_post_centered(x, s) + @test mean(approx_post_x) ≈ mean(approx_post_centered_x) + @test cov(approx_post_x) ≈ cov(approx_post_centered_x) + @test elbo(approx, f(x, s), y) ≈ elbo(approx_centered, f(x, s), y) + end + + # Check that Zygote is able to run. Assume correctness of result. + Zygote.gradient(elbo, approx, f(x, s), y) + end + @testset "Decoupled Formulation" begin + + # Generate pseudo-data. + v = range(-5.0, 5.0; length=9) + ŷ = randn(rng, length(v)) + Ŝ = Diagonal(rand(rng, length(v)) .+ 0.1) + + # Construct approximate posterior. + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, v, ŷ) + + # Check that the posterior produced satisfies the AbstractGPs API. + approx_posterior = posterior(approx) + AbstractGPs.TestUtils.test_internal_abstractgps_interface( + rng, approx_posterior, x, z + ) + + # Check that the posterior is close to an equivalent Centered approximation. + @testset "compare against equivalent centered" begin + qu = approx_posterior(z, 1e-12) + approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) + approx_post_centered = posterior(approx_centered) + + approx_post_x = approx_posterior(x, s) + approx_post_centered_x = approx_post_centered(x, s) + @test mean(approx_post_x) ≈ mean(approx_post_centered_x) + @test cov(approx_post_x) ≈ cov(approx_post_centered_x) + @test elbo(approx, f(x, s), y) ≈ elbo(approx_centered, f(x, s), y) + end + + # Check that Zygote is able to run. Assume correctness of result. + Zygote.gradient(elbo, approx, f(x, s), y) + end + end end From 2d91393d6516d0ed837380ed5e01de8806151dc6 Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 13:34:03 +0000 Subject: [PATCH 2/9] Pseudo-obs example --- docs/src/userguide.md | 28 +-- .../d-sparse-parametrisations/Project.toml | 17 ++ examples/d-sparse-parametrisations/script.jl | 174 ++++++++++++++++++ 3 files changed, 194 insertions(+), 25 deletions(-) create mode 100644 examples/d-sparse-parametrisations/Project.toml create mode 100644 examples/d-sparse-parametrisations/script.jl diff --git a/docs/src/userguide.md b/docs/src/userguide.md index 1ca7e9db..3476d72f 100644 --- a/docs/src/userguide.md +++ b/docs/src/userguide.md @@ -46,31 +46,9 @@ The approximate posterior constructed above will be a very poor approximation, s ```julia elbo(SparseVariationalApproximation(fz, q), fx, y) ``` -A detailed example of how to carry out such optimisation is given in [Regression: Sparse Variational Gaussian Process for Stochastic Optimisation with Flux.jl](@ref). For an example of non-conjugate inference, see [Classification: Sparse Variational Approximation for Non-Conjugate Likelihoods with Optim's L-BFGS](@ref). # Available Parametrizations -Two parametrizations of `q(u)` are presently available: [`Centered`](@ref) and [`NonCentered`](@ref). -The `Centered` parametrization expresses `q(u)` directly in terms of its mean and covariance. -The `NonCentered` parametrization instead parametrizes the mean and covariance of -`ε := cholesky(cov(u)).U' \ (u - mean(u))`. -These parametrizations are also known respectively as "Unwhitened" and "Whitened". - -The choice of parametrization can have a substantial impact on the time it takes for ELBO -optimization to converge, and which parametrization is better in a particular situation is -not generally obvious. -That being said, the `NonCentered` parametrization often converges in fewer iterations, so it is the default -- -it is what is used in all of the examples above. - -If you require a particular parametrization, simply use the 3-argument version of the -approximation constructor: -```julia -SparseVariationalApproximation(Centered(), fz, q) -SparseVariationalApproximation(NonCentered(), fz, q) -``` - -For a general discussion around these two parametrizations, see e.g. [^Gorinova]. -For a GP-specific discussion, see e.g. section 3.4 of [^Paciorek]. - -[^Gorinova]: Gorinova, Maria and Moore, Dave and Hoffman, Matthew [Automatic Reparameterisation of Probabilistic Programs](http://proceedings.mlr.press/v119/gorinova20a) -[^Paciorek]: [Paciorek, Christopher Joseph. Nonstationary Gaussian processes for regression and spatial modelling. Diss. Carnegie Mellon University, 2003.](https://www.stat.berkeley.edu/~paciorek/diss/paciorek-thesis.pdf) +There are various ways to parametrise the approximate posterior. +See [The Various Pseudo-Point Approximation Parametrisations](@ref) for more info and +worked examples. diff --git a/examples/d-sparse-parametrisations/Project.toml b/examples/d-sparse-parametrisations/Project.toml new file mode 100644 index 00000000..746832db --- /dev/null +++ b/examples/d-sparse-parametrisations/Project.toml @@ -0,0 +1,17 @@ +[deps] +AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" +ApproximateGPs = "298c2ebc-0411-48ad-af38-99e88101b606" +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DrWatson = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1" +Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0" +KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" +Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" +ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/examples/d-sparse-parametrisations/script.jl b/examples/d-sparse-parametrisations/script.jl new file mode 100644 index 00000000..9e2d80e0 --- /dev/null +++ b/examples/d-sparse-parametrisations/script.jl @@ -0,0 +1,174 @@ +# # The Various Pseudo-Point Approximation Parametrisations +# +# ### Note to the reader +# At the time of writing (March 2021) the best way to parametrise the approximate posterior +# remains a surprisingly active area of research. +# If you are reading this and feel that it has become outdated, or was incorrect in the +# first instance, it would be greatly appreciated if you could open an issue to discuss. +# +# +# ## Introduction +# +# This example examines the various ways in which this package supports parametrising the +# approximate posterior when utilising sparse approximations. +# +# All sparse (a.k.a. pseudo-point) approximations in this package utilise an approximate +# posterior over a GP ``f`` of the form +# ```math +# q(f) = q(\mathbf{u}) \, p(f_{\neq \mathbf{u}} | \mathbf{u}) +# ``` +# where samples from ``f`` are functions mapping ``\mathcal{X} \to \mathbb{R}``, +# ``\mathbf{u} := f(\mathbf{z})``, ``\mathbf{z} \in \mathcal{X}^M`` are the pseudo-inputs, +# and ``f_{\neq \mathbf{u}}`` denotes ``f`` at all indices other than those in +# ``\mathbf{z}``.[^Titsias] +# ``\mathbf{u} := q(f(\mathbf{z}))`` is generally restricted to be a multivariate Gaussian, to which end ApproximateGPs presently offers four parametrisations: +# 1. Centered ("Unwhitened"): ``q(\mathbf{u}) = \mathcal{N}(\mathbf{m}, \mathbf{C})``, ``\quad \mathbf{m} \in \mathbb{R}^M`` and positive-definite ``\mathbf{C} \in \mathbb{R}^{M \times M}``, +# 1. Non-Centered ("Whitened"): ``q(\mathbf{u}) = \mathcal{N}(\mathbf{L} \mathbf{m}, \mathbf{L} \mathbf{C} \mathbf{T}^\top)``, ``\quad \mathbf{L} \mathbf{L}^\top = \text{cov}(\mathbf{u})``, +# 1. Pseudo-Observation: ``q(\mathbf{u}) \propto p(\mathbf{u}) \, \mathcal{N}(\hat{\mathbf{y}}; \mathbf{u}, \hat{\mathbf{S}})``, ``\quad \hat{\mathbf{y}} \in \mathbb{R}^M`` and positive-definite ``\hat{\mathbf{S}} \in \mathbb{R}^{M \times M}``, +# 1. Decoupled Pseudo-Observation: ``q(\mathbf{u}) \propto p(\mathbf{u}) \, \mathcal{N}(\hat{\mathbf{y}}; f(\mathbf{v}), \hat{\mathbf{S}})``, ``\quad \hat{\mathbf{y}} \in \mathbb{R}^R``, ``\hat{\mathbf{S}} \in \mathbb{R}^{R \times R}`` is positive-definite and diagonal, and ``\mathbf{v} \in \mathcal{X}^R``. +# +# The choice of parametrization can have a substantial impact on the time it takes for ELBO +# optimization to converge, and which parametrization is better in a particular situation is +# not generally obvious. +# That being said, the `NonCentered` parametrization often converges in fewer iterations +# than the `Centered`, and is widely used, so it is the default. +# +# For a general discussion around the centered vs non-centered, see e.g. [^Gorinova]. +# For a GP-specific discussion, see e.g. section 3.4 of [^Paciorek]. + +# ## Setup + +using AbstractGPs +using ApproximateGPs +using CairoMakie +using Distributions +using Images +using KernelFunctions +using LinearAlgebra +using Optim +using Random +using Zygote + +# A simple GP with inputs on the reals. +f = GP(SEKernel()); +N = 100; +x = range(-3.0, 3.0; length=N); + +# Generate some observations. +Σ = Diagonal(fill(0.1, N)); +y = rand(Xoshiro(123456), f(x, Σ)); + +# Use a handful of pseudo-points. +M = 10; +z = range(-3.5, 3.5; length=M); + +# Other misc. constants that we'll need later: +x_pred = range(-5.0, 5.0; length=300); +jitter = 1e-9; + +# ## The Relationship Between Parametrisations +# +# Much of the time, one can convert between the different parametrisations to obtain +# equivalent ``q(\mathbf{u})``, for a given set of hyperparameters. +# If it's unclear from the above how these parametrisations relate to one another, the +# following should help to crystalise the relationship. +# +# ### Centered vs Non-Centered +# +# Both the `Centered` and `NonCentered` parametrisations are specified by a mean vector `m` +# and covariance matrix `C`, but in slightly different ways. +# The `Centered` parametrisation interprets `m` and `C` as the mean and covariance of +# ``q(\mathbf{u})`` directly, while the `NonCentered` parametrisation inteprets them as the +# mean and covariance of the approximate posterior over +# `ε := cholesky(cov(u)).U' \ (u - mean(u))`. +# +# To see this, consider the following non-centered approximate posterior: +fz = f(z, jitter); +qu_non_centered = MvNormal(randn(M), Matrix{Float64}(I, M, M)); +non_centered_approx = SparseVariationalApproximation(NonCentered(), fz, qu_non_centered); + +# The equivalent centered parametrisation can be found by multiplying the parameters of +# `qu_non_centered` by the Cholesky factor of the prior covariance: +L = cholesky(Symmetric(cov(fz))).L; +qu_centered = MvNormal(L * mean(qu_non_centered), L * cov(qu_non_centered) * L'); +centered_approx = SparseVariationalApproximation(Centered(), fz, qu_centered); + +# We can gain some confidence that they're actually the same by querying the approximate +# posterior statistics at some new locations: +q_non_centered = posterior(non_centered_approx) +q_centered = posterior(centered_approx) +@assert mean(q_non_centered(x_pred)) ≈ mean(q_centered(x_pred)) +@assert cov(q_non_centered(x_pred)) ≈ cov(q_centered(x_pred)) + + +# ### Pseudo-Observation vs Centered +# +# The relationship between these two parametrisations is only slightly more complicated. +# Consider the following pseudo-observation parametrisation of the approximate posterior: +ŷ = randn(M); +Ŝ = Matrix{Float64}(I, M, M); +pseudo_obs_approx = PseudoObsSparseVariationalApproximation(f, z, Ŝ, ŷ); +q_pseudo_obs = posterior(pseudo_obs_approx); + +# The corresponding centered approximation is given via the usual Gaussian conditioning +# formulae: +C = cov(fz); +C_centered = C - C * (cholesky(Symmetric(C + Ŝ)) \ C); +m_centered = mean(fz) + C / cholesky(Symmetric(C + Ŝ)) * (ŷ - mean(fz)); +qu_centered = MvNormal(m_centered, Symmetric(C_centered)); +centered_approx = SparseVariationalApproximation(Centered(), fz, qu_centered); +q_centered = posterior(centered_approx); + +# Again, we can gain some confidence that they're the same by comparing the posterior +# marginal statistics. +@assert mean(q_pseudo_obs(x_pred)) ≈ mean(q_centered(x_pred)) +@assert cov(q_pseudo_obs(x_pred)) ≈ cov(q_centered(x_pred)) + +# While it's always possible to find an approximation using the centered parametrisation +# which is equivalent to a given pseudo-observation parametrisation, the converse is not +# true. +# That is, for a given `C = cov(fz)` and particular choice of covariance matrix `Ĉ` in a +# centered parametrisation, it may not be the case that there exists a positive-definite +# pseudo-observation covariance matrix `Ŝ` such that ``\hat{C} = C - C (C + \hat{S})^{-1} C``. +# +# However, ths is not necessarily a problem: if the likelihood used in the model is +# log-concave then the optimal choice for `Ĉ` can always be represented using this +# pseudo-observation parametrisation. +# Even when this is not the case, it is not guaruanteed to be the case that the optimal +# choice for `q(u)` lives outside of the family of distributions which can be expressed +# within the pseudo-observation family. + +# +# ### Decoupled Pseudo-Observation vs Non-Centered +# +# The relationship here is the most delicate, due to the restriction that +# ``\hat{\mathbf{S}}`` must be diagonal. +# This approximation achieves the optimal approximate posterior when the choice of +# pseudo observational data (``\hat{y}``, ``\hat{\mathbf{S}}``, and ``\mathbf{v}``) equal +# the original observational data. +# When the original observational data involves a non-Gaussian likelihood, this +# approximation family can still obtain the optimal approximate posterior provided that +# ``\mathbf{v}`` lines up with the inputs associated with the original data, ``\mathbf{x}``. +# +# To see this, consider the pseudo-observation approximation which makes use of the +# original observational data (generated at the top of this example): +decoupled_approx = PseudoObsSparseVariationalApproximation(f, z, Σ, x, y); +decoupled_posterior = posterior(decoupled_approx); + +# We can get the optimal pseudo-point approximation using standard functionality: +optimal_approx_post = posterior(VFE(f(z, jitter)), f(x, Σ), y); + +# The marginal statistics agree: +@assert mean(optimal_approx_post(x_pred)) ≈ mean(decoupled_posterior(x_pred)) +@assert cov(optimal_approx_post(x_pred)) ≈ cov(decoupled_posterior(x_pred)) + +# The reason to think that this parametrisation will do something sensible is this property. +# Obviously when ``\mathbf{v} \neq \mathbf{x}`` the optimal approximate posterior cannot be +# recovered, however, when the hope is that there exists a small pseudo-dataset which gets +# close to the optimum. + + + +# [^Titsias]: Titsias, M. K. [Variational learning of inducing variables in sparse Gaussian processes](https://proceedings.mlr.press/v5/titsias09a.html) +# [^Gorinova]: Gorinova, Maria and Moore, Dave and Hoffman, Matthew [Automatic Reparameterisation of Probabilistic Programs](http://proceedings.mlr.press/v119/gorinova20a) +# [^Paciorek]: [Paciorek, Christopher Joseph. Nonstationary Gaussian processes for regression and spatial modelling. Diss. Carnegie Mellon University, 2003.](https://www.stat.berkeley.edu/~paciorek/diss/paciorek-thesis.pdf) From 50d0a3a990ef4b6d2e274ad5946df9798ab16d43 Mon Sep 17 00:00:00 2001 From: willtebbutt Date: Thu, 24 Mar 2022 13:53:33 +0000 Subject: [PATCH 3/9] Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- examples/d-sparse-parametrisations/script.jl | 3 --- src/ApproximateGPs.jl | 4 +-- src/SparseVariationalApproximationModule.jl | 27 ++++++++------------ test/SparseVariationalApproximationModule.jl | 16 +++++++++--- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/examples/d-sparse-parametrisations/script.jl b/examples/d-sparse-parametrisations/script.jl index 9e2d80e0..0ce6668d 100644 --- a/examples/d-sparse-parametrisations/script.jl +++ b/examples/d-sparse-parametrisations/script.jl @@ -100,7 +100,6 @@ q_centered = posterior(centered_approx) @assert mean(q_non_centered(x_pred)) ≈ mean(q_centered(x_pred)) @assert cov(q_non_centered(x_pred)) ≈ cov(q_centered(x_pred)) - # ### Pseudo-Observation vs Centered # # The relationship between these two parametrisations is only slightly more complicated. @@ -167,8 +166,6 @@ optimal_approx_post = posterior(VFE(f(z, jitter)), f(x, Σ), y); # recovered, however, when the hope is that there exists a small pseudo-dataset which gets # close to the optimum. - - # [^Titsias]: Titsias, M. K. [Variational learning of inducing variables in sparse Gaussian processes](https://proceedings.mlr.press/v5/titsias09a.html) # [^Gorinova]: Gorinova, Maria and Moore, Dave and Hoffman, Matthew [Automatic Reparameterisation of Probabilistic Programs](http://proceedings.mlr.press/v119/gorinova20a) # [^Paciorek]: [Paciorek, Christopher Joseph. Nonstationary Gaussian processes for regression and spatial modelling. Diss. Carnegie Mellon University, 2003.](https://www.stat.berkeley.edu/~paciorek/diss/paciorek-thesis.pdf) diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index 2f06d78d..566290cd 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -17,9 +17,7 @@ include("SparseVariationalApproximationModule.jl") DefaultQuadrature, Analytic, GaussHermite, MonteCarlo @reexport using .SparseVariationalApproximationModule: PseudoObsSparseVariationalApproximation, - ObsCovLikelihood, - DecoupledObsCovLikelihood - + PseudoObsSparseVariationalApproximation, ObsCovLikelihood, DecoupledObsCovLikelihood include("LaplaceApproximationModule.jl") @reexport using .LaplaceApproximationModule: LaplaceApproximation @reexport using .LaplaceApproximationModule: diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 8bfd9f4b..4acb051b 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -3,9 +3,7 @@ module SparseVariationalApproximationModule using ..API export SparseVariationalApproximation, - Centered, - NonCentered, - PseudoObsSparseVariationalApproximation + Centered, NonCentered, PseudoObsSparseVariationalApproximation using ..ApproximateGPs: _chol_cov, _cov using Distributions @@ -71,9 +69,8 @@ See also [`Centered`](@ref). struct NonCentered end struct SparseVariationalApproximation{ - Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal -} <: AbstractSparseVariationalApproximation - fz::Tfz +struct SparseVariationalApproximation{Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal} <: + AbstractSparseVariationalApproximation q::Tq end @@ -291,8 +288,10 @@ inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) = f.appr function API.approx_lml( sva::AbstractSparseVariationalApproximation, l_fx::Union{FiniteGP,LatentFiniteGP}, ys; - kwargs... -) + sva::AbstractSparseVariationalApproximation, + l_fx::Union{FiniteGP,LatentFiniteGP}, + ys; + kwargs..., return AbstractGPs.elbo(sva, l_fx, ys; kwargs...) end @@ -402,8 +401,6 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered}) end - -# # Pseudo-Observation Parametrisations of q(u). # @@ -468,9 +465,7 @@ function AbstractGPs.posterior( end function _prior_kl( - approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood} -) - f = approx.f +function _prior_kl(approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood}) z = approx.z y = approx.likelihood.y S = approx.likelihood.S @@ -482,9 +477,9 @@ function _prior_kl( m, C = mean_and_cov(posterior(approx)(z)) S_chol = cholesky(AbstractGPs._symmetric(S)) pseudo_lik = -( - length(y) * AbstractGPs.log2π + logdet(S_chol) + sum(abs2, S_chol.U' \ (y - m)) - ) / 2 - trace_term = tr(S_chol \ C) / 2 + pseudo_lik = + -(length(y) * AbstractGPs.log2π + logdet(S_chol) + sum(abs2, S_chol.U' \ (y - m))) / + 2 return -logp_pseudo_obs + pseudo_lik - trace_term end diff --git a/test/SparseVariationalApproximationModule.jl b/test/SparseVariationalApproximationModule.jl index 9f5eeb4d..812c7513 100644 --- a/test/SparseVariationalApproximationModule.jl +++ b/test/SparseVariationalApproximationModule.jl @@ -211,7 +211,9 @@ # Construct approximate posterior. approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, ŷ) - # Check that the posterior produced satisfies the AbstractGPs API. + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( + f, z, Ŝ, ŷ + ) approx_posterior = posterior(approx) AbstractGPs.TestUtils.test_internal_abstractgps_interface( rng, approx_posterior, x, z @@ -222,7 +224,9 @@ qu = approx_posterior(z, 1e-12) approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) approx_post_centered = posterior(approx_centered) - + approx_centered = SparseVariationalApproximation( + Centered(), f(z, 1e-12), qu + ) approx_post_x = approx_posterior(x, s) approx_post_centered_x = approx_post_centered(x, s) @test mean(approx_post_x) ≈ mean(approx_post_centered_x) @@ -243,7 +247,9 @@ # Construct approximate posterior. approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, v, ŷ) - # Check that the posterior produced satisfies the AbstractGPs API. + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( + f, z, Ŝ, v, ŷ + ) approx_posterior = posterior(approx) AbstractGPs.TestUtils.test_internal_abstractgps_interface( rng, approx_posterior, x, z @@ -254,7 +260,9 @@ qu = approx_posterior(z, 1e-12) approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) approx_post_centered = posterior(approx_centered) - + approx_centered = SparseVariationalApproximation( + Centered(), f(z, 1e-12), qu + ) approx_post_x = approx_posterior(x, s) approx_post_centered_x = approx_post_centered(x, s) @test mean(approx_post_x) ≈ mean(approx_post_centered_x) From cf64d24654c43bc855948bcea955bb724edca88a Mon Sep 17 00:00:00 2001 From: willtebbutt Date: Thu, 24 Mar 2022 13:56:31 +0000 Subject: [PATCH 4/9] Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/SparseVariationalApproximationModule.jl | 14 ++++++-------- test/SparseVariationalApproximationModule.jl | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 4acb051b..14a73b91 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -404,8 +404,6 @@ end # Pseudo-Observation Parametrisations of q(u). # - -@doc raw""" PseudoObsSparseVariationalApproximation( likelihood, f::AbstractGP, z::AbstractVector ) @@ -414,8 +412,8 @@ Parametrises `q(f(z))`, the approximate posterior at `f(z)`, using a surrogate l `likelihood`: `q(f(z)) ∝ p(f(z)) likelihood(f(z))`. """ struct PseudoObsSparseVariationalApproximation{ - Tlikelihood, Tf<:AbstractGP, Tz<:AbstractVector -} <: AbstractSparseVariationalApproximation + Tlikelihood,Tf<:AbstractGP,Tz<:AbstractVector + Tlikelihood,Tf<:AbstractGP,Tz<:AbstractVector likelihood::Tlikelihood f::Tf z::Tz @@ -429,8 +427,8 @@ _get_prior(approx::PseudoObsSparseVariationalApproximation) = approx.f Chooses `likelihood(u) = N(y; u, S)`. `length(y)` must be equal to the number of pseudo-points utilised in the sparse variational approximation. """ -struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real}, Ty<:AbstractVector{<:Real}} - S::TS +struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real},Ty<:AbstractVector{<:Real}} +struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real},Ty<:AbstractVector{<:Real}} y::Ty end @@ -493,8 +491,8 @@ of pseudo-points, where `f` is the GP to which this likelihood specifies the app posterior over `f(z)`. """ struct DecoupledObsCovLikelihood{ - TS<:Diagonal{<:Real}, Tv<:AbstractVector, Ty<:AbstractVector{<:Real} -} + TS<:Diagonal{<:Real},Tv<:AbstractVector,Ty<:AbstractVector{<:Real} + TS<:Diagonal{<:Real},Tv<:AbstractVector,Ty<:AbstractVector{<:Real} S::TS v::Tv y::Ty diff --git a/test/SparseVariationalApproximationModule.jl b/test/SparseVariationalApproximationModule.jl index 812c7513..e31ae49b 100644 --- a/test/SparseVariationalApproximationModule.jl +++ b/test/SparseVariationalApproximationModule.jl @@ -209,7 +209,9 @@ Ŝ = _S * _S' + I # Construct approximate posterior. - approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, ŷ) + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( + f, z, Ŝ, ŷ + ) approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( f, z, Ŝ, ŷ @@ -222,7 +224,9 @@ # Check that the posterior is close to an equivalent Centered approximation. @testset "compare against equivalent centered" begin qu = approx_posterior(z, 1e-12) - approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) + approx_centered = SparseVariationalApproximation( + Centered(), f(z, 1e-12), qu + ) approx_post_centered = posterior(approx_centered) approx_centered = SparseVariationalApproximation( Centered(), f(z, 1e-12), qu @@ -245,7 +249,9 @@ Ŝ = Diagonal(rand(rng, length(v)) .+ 0.1) # Construct approximate posterior. - approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation(f, z, Ŝ, v, ŷ) + approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( + f, z, Ŝ, v, ŷ + ) approx = ApproximateGPs.SparseVariationalApproximationModule.PseudoObsSparseVariationalApproximation( f, z, Ŝ, v, ŷ @@ -258,7 +264,9 @@ # Check that the posterior is close to an equivalent Centered approximation. @testset "compare against equivalent centered" begin qu = approx_posterior(z, 1e-12) - approx_centered = SparseVariationalApproximation(Centered(), f(z, 1e-12), qu) + approx_centered = SparseVariationalApproximation( + Centered(), f(z, 1e-12), qu + ) approx_post_centered = posterior(approx_centered) approx_centered = SparseVariationalApproximation( Centered(), f(z, 1e-12), qu From 348d0d0c61593ba49b7a8dc38818744bfdf6ff52 Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 14:11:46 +0000 Subject: [PATCH 5/9] Fix problems from formatting --- src/SparseVariationalApproximationModule.jl | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 14a73b91..28bd7733 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -68,7 +68,6 @@ See also [`Centered`](@ref). """ struct NonCentered end -struct SparseVariationalApproximation{ struct SparseVariationalApproximation{Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal} <: AbstractSparseVariationalApproximation q::Tq @@ -287,11 +286,11 @@ inducing_points(f::ApproxPosteriorGP{<:SparseVariationalApproximation}) = f.appr # function API.approx_lml( - sva::AbstractSparseVariationalApproximation, l_fx::Union{FiniteGP,LatentFiniteGP}, ys; sva::AbstractSparseVariationalApproximation, l_fx::Union{FiniteGP,LatentFiniteGP}, ys; kwargs..., +) return AbstractGPs.elbo(sva, l_fx, ys; kwargs...) end @@ -402,8 +401,8 @@ end # Pseudo-Observation Parametrisations of q(u). -# +@doc raw""" PseudoObsSparseVariationalApproximation( likelihood, f::AbstractGP, z::AbstractVector ) @@ -413,7 +412,7 @@ Parametrises `q(f(z))`, the approximate posterior at `f(z)`, using a surrogate l """ struct PseudoObsSparseVariationalApproximation{ Tlikelihood,Tf<:AbstractGP,Tz<:AbstractVector - Tlikelihood,Tf<:AbstractGP,Tz<:AbstractVector +} likelihood::Tlikelihood f::Tf z::Tz @@ -427,7 +426,6 @@ _get_prior(approx::PseudoObsSparseVariationalApproximation) = approx.f Chooses `likelihood(u) = N(y; u, S)`. `length(y)` must be equal to the number of pseudo-points utilised in the sparse variational approximation. """ -struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real},Ty<:AbstractVector{<:Real}} struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real},Ty<:AbstractVector{<:Real}} y::Ty end @@ -462,7 +460,6 @@ function AbstractGPs.posterior( return posterior(f(z, S), y) end -function _prior_kl( function _prior_kl(approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood}) z = approx.z y = approx.likelihood.y @@ -474,10 +471,8 @@ function _prior_kl(approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikel # pseudo-reconstruction term. m, C = mean_and_cov(posterior(approx)(z)) S_chol = cholesky(AbstractGPs._symmetric(S)) - pseudo_lik = -( - pseudo_lik = - -(length(y) * AbstractGPs.log2π + logdet(S_chol) + sum(abs2, S_chol.U' \ (y - m))) / - 2 + quad_form = sum(abs2, S_chol.U' \ (y - m)) + pseudo_lik = -(length(y) * AbstractGPs.log2π + logdet(S_chol) + quad_form) / 2 return -logp_pseudo_obs + pseudo_lik - trace_term end @@ -492,7 +487,7 @@ posterior over `f(z)`. """ struct DecoupledObsCovLikelihood{ TS<:Diagonal{<:Real},Tv<:AbstractVector,Ty<:AbstractVector{<:Real} - TS<:Diagonal{<:Real},Tv<:AbstractVector,Ty<:AbstractVector{<:Real} +} S::TS v::Tv y::Ty From 6961c23114b32c9c584eefb6ce383214fb82af33 Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 14:14:45 +0000 Subject: [PATCH 6/9] Fix formatting --- src/ApproximateGPs.jl | 1 - src/SparseVariationalApproximationModule.jl | 1 - 2 files changed, 2 deletions(-) diff --git a/src/ApproximateGPs.jl b/src/ApproximateGPs.jl index 566290cd..515dc0a3 100644 --- a/src/ApproximateGPs.jl +++ b/src/ApproximateGPs.jl @@ -16,7 +16,6 @@ include("SparseVariationalApproximationModule.jl") @reexport using .SparseVariationalApproximationModule: DefaultQuadrature, Analytic, GaussHermite, MonteCarlo @reexport using .SparseVariationalApproximationModule: - PseudoObsSparseVariationalApproximation, PseudoObsSparseVariationalApproximation, ObsCovLikelihood, DecoupledObsCovLikelihood include("LaplaceApproximationModule.jl") @reexport using .LaplaceApproximationModule: LaplaceApproximation diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 28bd7733..1ada16e3 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -399,7 +399,6 @@ function _prior_kl(sva::SparseVariationalApproximation{NonCentered}) return (trace_term + m_ε'm_ε - length(m_ε) - logdet(C_ε)) / 2 end - # Pseudo-Observation Parametrisations of q(u). @doc raw""" From 41e93ba04fb6cef3b53553a37f81cdfebd7d3cec Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 16:06:36 +0000 Subject: [PATCH 7/9] Bump patch --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4d78bbd4..ef6873d5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ApproximateGPs" uuid = "298c2ebc-0411-48ad-af38-99e88101b606" authors = ["JuliaGaussianProcesses Team"] -version = "0.3.4" +version = "0.3.5" [deps] AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" From 87e2c39198336f0dabc5cf039f75f8d835a5be04 Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 17:13:54 +0000 Subject: [PATCH 8/9] Fix things the formatter broke --- src/SparseVariationalApproximationModule.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index 1ada16e3..a5feda9f 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -68,8 +68,10 @@ See also [`Centered`](@ref). """ struct NonCentered end -struct SparseVariationalApproximation{Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal} <: - AbstractSparseVariationalApproximation +struct SparseVariationalApproximation{ + Parametrization,Tfz<:FiniteGP,Tq<:AbstractMvNormal +} <: AbstractSparseVariationalApproximation + fz::Tfz q::Tq end @@ -411,7 +413,7 @@ Parametrises `q(f(z))`, the approximate posterior at `f(z)`, using a surrogate l """ struct PseudoObsSparseVariationalApproximation{ Tlikelihood,Tf<:AbstractGP,Tz<:AbstractVector -} +} <: AbstractSparseVariationalApproximation likelihood::Tlikelihood f::Tf z::Tz @@ -426,6 +428,7 @@ Chooses `likelihood(u) = N(y; u, S)`. `length(y)` must be equal to the number of pseudo-points utilised in the sparse variational approximation. """ struct ObsCovLikelihood{TS<:AbstractMatrix{<:Real},Ty<:AbstractVector{<:Real}} + S::TS y::Ty end @@ -460,6 +463,7 @@ function AbstractGPs.posterior( end function _prior_kl(approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikelihood}) + f = approx.f z = approx.z y = approx.likelihood.y S = approx.likelihood.S From d47c8098d7a281340a7816f3113e0191e3295d27 Mon Sep 17 00:00:00 2001 From: WT Date: Thu, 24 Mar 2022 17:16:52 +0000 Subject: [PATCH 9/9] Fix remaining error from formatter --- src/SparseVariationalApproximationModule.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SparseVariationalApproximationModule.jl b/src/SparseVariationalApproximationModule.jl index a5feda9f..48f6c4f5 100644 --- a/src/SparseVariationalApproximationModule.jl +++ b/src/SparseVariationalApproximationModule.jl @@ -476,6 +476,7 @@ function _prior_kl(approx::PseudoObsSparseVariationalApproximation{<:ObsCovLikel S_chol = cholesky(AbstractGPs._symmetric(S)) quad_form = sum(abs2, S_chol.U' \ (y - m)) pseudo_lik = -(length(y) * AbstractGPs.log2π + logdet(S_chol) + quad_form) / 2 + trace_term = tr(S_chol \ C) / 2 return -logp_pseudo_obs + pseudo_lik - trace_term end