diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml index a32aeef..7a322e0 100644 --- a/benchmarks/Project.toml +++ b/benchmarks/Project.toml @@ -1,8 +1,6 @@ [deps] ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" -MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" Pathfinder = "b1d3bc72-d0e7-4279-b92f-7fa5d6d2d454" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" diff --git a/benchmarks/bouncy.jl b/benchmarks/bouncy.jl index 53b41d2..9aed9cb 100644 --- a/benchmarks/bouncy.jl +++ b/benchmarks/bouncy.jl @@ -14,56 +14,76 @@ using ForwardDiff using ForwardDiff: Dual using Pathfinder using Pathfinder.PDMats +using MCMCChains +using TupleVectors: chainvec +using Tilde.MeasureTheory: transform Random.seed!(1) +function make_grads(post) + as_post = as(post) + d = TV.dimension(as_post) + tr(θ) = transform(as_post, θ) + obj(θ) = -Tilde.unsafe_logdensityof(post, tr(θ)) + ℓ(θ) = -obj(θ) + @inline function dneglogp(t, x, v, args...) # two directional derivatives + f(t) = obj(x + t * v) + u = ForwardDiff.derivative(f, Dual{:hSrkahPmmC}(0.0, 1.0)) + u.value, u.partials[] + end + + gconfig = ForwardDiff.GradientConfig(obj, rand(d), ForwardDiff.Chunk{d}()) + function ∇neglogp!(y, t, x, args...) + ForwardDiff.gradient!(y, obj, x, gconfig) + y + end + ℓ, dneglogp, ∇neglogp! +end + +# ↑ general purpose +############################################################ +# ↓ problem-specific + # read data function readlrdata() fname = joinpath("lr.data") z = readdlm(fname) - A = z[:, 1:end-1] + A = z[:, 1:(end-1)] A = [ones(size(A, 1)) A] y = z[:, end] .- 1 return A, y end -A, y = readlrdata(); -At = collect(A'); model_lr = @model (At, y, σ) begin d, n = size(At) θ ~ Normal(σ = σ)^d for j in 1:n - logitp = dot(view(At, :, j), θ) + logitp = view(At, :, j)' * θ y[j] ~ Bernoulli(logitp = logitp) end end + +# Define model arguments +A, y = readlrdata(); +At = collect(A'); σ = 100.0 -function make_grads(model_lr, At, y, σ) - post = model_lr(At, y, σ) | (; y) - as_post = as(post) - obj(θ) = -Tilde.unsafe_logdensityof(post, transform(as_post, θ)) - ℓ(θ) = -obj(θ) - @inline function dneglogp(t, x, v) # two directional derivatives - f(t) = obj(x + t * v) - u = ForwardDiff.derivative(f, Dual{:hSrkahPmmC}(0.0, 1.0)) - u.value, u.partials[] - end +# Represent the posterior +post = model_lr(At, y, σ) | (; y) - gconfig = ForwardDiff.GradientConfig(obj, rand(25), ForwardDiff.Chunk{25}()) - function ∇neglogp!(y, t, x) - ForwardDiff.gradient!(y, obj, x, gconfig) - return - end - post, ℓ, dneglogp, ∇neglogp! -end +d = TV.dimension(as(post)) + + +ℓ, dneglogp, ∇neglogp! = make_grads(post) -post, ℓ, dneglogp, ∇neglogp! = make_grads(model_lr, At, y, σ) -# Try things out -dneglogp(2.4, randn(25), randn(25)); -∇neglogp!(randn(25), 2.1, randn(25)); +# Make sure gradients are working +let + @show dneglogp(2.4, randn(d), randn(d)) + y = Vector{Float64}(undef, d) + @show ∇neglogp!(y, 2.1, randn(d)) + nothing +end -d = 25 # number of parameters t0 = 0.0; x0 = zeros(d); # starting point sampler # estimated posterior mean (n=100000, 797s) @@ -129,8 +149,7 @@ sampler = ZZB.NotFactSampler( ), ); -using TupleVectors: chainvec -using Tilde.MeasureTheory: transform +# @time first(Iterators.drop(tvs,1000)) function collect_sampler(t, sampler, n; progress = true, progress_stops = 20) if progress @@ -166,7 +185,6 @@ elapsed_time = @elapsed @time begin bps_samples, info = collect_sampler(as(post), sampler, n; progress = false) end -using MCMCChains bps_chain = MCMCChains.Chains(bps_samples.θ); bps_chain = setinfo(bps_chain, (; start_time = 0.0, stop_time = elapsed_time));