diff --git a/ext/ForwardDiffStaticArraysExt.jl b/ext/ForwardDiffStaticArraysExt.jl index 63f841db..2d306724 100644 --- a/ext/ForwardDiffStaticArraysExt.jl +++ b/ext/ForwardDiffStaticArraysExt.jl @@ -114,6 +114,8 @@ ForwardDiff.hessian(f::F, x::StaticArray, cfg::HessianConfig, ::Val) where {F} = ForwardDiff.hessian!(result::AbstractArray, f::F, x::StaticArray) where {F} = jacobian!(result, Base.Fix1(gradient, f), x) +ForwardDiff.hessian!(result::AbstractArray, f::F, grad::AbstractArray, x::StaticArray) where {F} = hessian!(result, f, grad, x, HessianConfig(f, grad, x)) + ForwardDiff.hessian!(result::MutableDiffResult, f::F, x::StaticArray) where {F} = hessian!(result, f, x, HessianConfig(f, result, x)) ForwardDiff.hessian!(result::ImmutableDiffResult, f::F, x::StaticArray, cfg::HessianConfig) where {F} = hessian!(result, f, x) diff --git a/src/config.jl b/src/config.jl index d561b4fd..82c13742 100644 --- a/src/config.jl +++ b/src/config.jl @@ -227,6 +227,32 @@ function HessianConfig(f::F, return HessianConfig(jacobian_config, gradient_config) end +""" + ForwardDiff.HessianConfig(f, y::AbstractArray, x::AbstractArray, chunk::Chunk = Chunk(x)) + +Return a `HessianConfig` instance based on the type of `f` and the types/shapes of `y` +and the input vector `x`. + +The returned `HessianConfig` instance contains all the work buffers required by +`ForwardDiff.hessian!`. The buffers are configured for the case where `y` argument is an +`AbstractArray`. + +If `f` is `nothing` instead of the actual target function, then the returned instance can +be used with any target function. However, this will reduce ForwardDiff's ability to catch +and prevent perturbation confusion (see https://github.com/JuliaDiff/ForwardDiff.jl/issues/83). + +This constructor does not store/modify `x`. +""" +function HessianConfig(f::F, + y::AbstractArray{Y}, + x::AbstractArray{X}, + chunk::Chunk = Chunk(x), + tag = Tag(f, X)) where {F,Y,X} + jacobian_config = JacobianConfig(f, y, x, chunk, tag) + gradient_config = GradientConfig(f, jacobian_config.duals[2], chunk, tag) + return HessianConfig(jacobian_config, gradient_config) +end + """ ForwardDiff.HessianConfig(f, result::DiffResult, x::AbstractArray, chunk::Chunk = Chunk(x)) diff --git a/src/hessian.jl b/src/hessian.jl index 9c755c9a..35975874 100644 --- a/src/hessian.jl +++ b/src/hessian.jl @@ -36,6 +36,36 @@ function hessian!(result::AbstractArray, f::F, x::AbstractArray, cfg::HessianCon return result end +# Struct for inner gradient instead of +# a closure to avoid allocations +struct InnerGradientForNallocHess{C,F} + cfg::C + f::F +end + +function (g::InnerGradientForNallocHess)(y, z) + gradient!(y, g.f, z, g.cfg.gradient_config, Val{false}()) + return y +end + + +""" + ForwardDiff.hessian!(result::AbstractArray, f, grad::AbstractArray, x::AbstractArray, cfg::HessianConfig = HessianConfig(f, grad, x), check=Val{true}()) + +Compute `H(f)` (i.e. `J(∇(f))`) with no allocations evaluated at `x` and store the result(s) in `result`, +assuming `f` is called as `f(x)`. `grad` has to be passed as an argument for storing the intermediate gradients. + +This method assumes that `isa(f(x), Real)`. + +Set `check` to `Val{false}()` to disable tag checking. This can lead to perturbation confusion, so should be used with care. +""" +function hessian!(result::AbstractArray, f::F, grad::AbstractArray, x::AbstractArray, cfg::HessianConfig{T} = HessianConfig(f, grad, x), ::Val{CHK} = Val{true}()) where {F,T,CHK} + require_one_based_indexing(result, grad, x) + CHK && checktag(T, f, x) + ∇f! = InnerGradientForNallocHess(cfg, f) + jacobian!(result, ∇f!, grad, x, cfg.jacobian_config, Val{false}()) + return result +end # We use this struct below instead of an # equivalent closure in order to avoid diff --git a/test/HessianTest.jl b/test/HessianTest.jl index 4c667e5e..94c0f51c 100644 --- a/test/HessianTest.jl +++ b/test/HessianTest.jl @@ -24,7 +24,9 @@ h = [-66.0 -40.0 0.0; 0.0 -80.0 200.0] @testset "running hardcoded test with chunk size = $c and tag = $(repr(tag))" for c in HESSIAN_CHUNK_SIZES, tag in (nothing, Tag((f,ForwardDiff.gradient), eltype(x))) + grad = similar(x) cfg = ForwardDiff.HessianConfig(f, x, ForwardDiff.Chunk{c}(), tag) + cfg_noalloc = ForwardDiff.HessianConfig(f, grad, x, ForwardDiff.Chunk{c}(), tag) resultcfg = ForwardDiff.HessianConfig(f, DiffResults.HessianResult(x), x, ForwardDiff.Chunk{c}(), tag) @test eltype(resultcfg) == eltype(cfg) @@ -40,6 +42,15 @@ h = [-66.0 -40.0 0.0; ForwardDiff.hessian!(out, f, x, cfg) @test isapprox(out, h) + out = similar(x, 3, 3) + ForwardDiff.hessian!(out, f, grad, x, cfg_noalloc) + @test isapprox(out, h) + + grad = similar(x) + out = similar(x, 3, 3) + ForwardDiff.hessian!(out, f, grad, x) + @test isapprox(out, h) + out = DiffResults.HessianResult(x) ForwardDiff.hessian!(out, f, x) @test isapprox(DiffResults.value(out), v) @@ -69,7 +80,9 @@ for f in DiffTests.VECTOR_TO_NUMBER_FUNCS # finite difference approximation error is really bad for Hessians... @test isapprox(h, Calculus.hessian(f, X), atol=0.02) @testset "$f with chunk size = $c and tag = $(repr(tag))" for c in HESSIAN_CHUNK_SIZES, tag in (nothing, Tag((f,ForwardDiff.gradient), eltype(x))) + grad = similar(X) cfg = ForwardDiff.HessianConfig(f, X, ForwardDiff.Chunk{c}(), tag) + cfg_noalloc = ForwardDiff.HessianConfig(f, grad, X, ForwardDiff.Chunk{c}(), tag) resultcfg = ForwardDiff.HessianConfig(f, DiffResults.HessianResult(X), X, ForwardDiff.Chunk{c}(), tag) out = ForwardDiff.hessian(f, X, cfg) @@ -79,6 +92,10 @@ for f in DiffTests.VECTOR_TO_NUMBER_FUNCS ForwardDiff.hessian!(out, f, X, cfg) @test isapprox(out, h) + out = similar(X, length(X), length(X)) + ForwardDiff.hessian!(out, f, grad, X, cfg_noalloc) + @test isapprox(out, h) + out = DiffResults.HessianResult(X) ForwardDiff.hessian!(out, f, X, resultcfg) @test isapprox(DiffResults.value(out), v) @@ -120,6 +137,16 @@ for T in (StaticArrays.SArray, StaticArrays.MArray) ForwardDiff.hessian!(out, prod, sx, scfg) @test out == actual + out = similar(x, 9, 9) + grad = similar(x) + ForwardDiff.hessian!(out, prod, grad, sx, ForwardDiff.HessianConfig(nothing, grad, x)) + @test out == actual + + out = similar(x, 9, 9) + grad = similar(x) + ForwardDiff.hessian!(out, prod, grad, sx, ForwardDiff.HessianConfig(nothing, grad, sx)) + @test out == actual + result = DiffResults.HessianResult(x) result = ForwardDiff.hessian!(result, prod, x)