From 4814cbe08264abce9b5614a07fe81d2beb1e377e Mon Sep 17 00:00:00 2001 From: Fabian Gans Date: Wed, 12 Feb 2025 12:51:25 +0100 Subject: [PATCH 1/3] add code for weighted online linear regression --- src/weightedlinearregression.jl | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/weightedlinearregression.jl diff --git a/src/weightedlinearregression.jl b/src/weightedlinearregression.jl new file mode 100644 index 0000000..b0b82e2 --- /dev/null +++ b/src/weightedlinearregression.jl @@ -0,0 +1,34 @@ +smooth(a, b, γ) = a + γ * (b - a) + +struct WeightedFit + mx::Float64 + my::Float64 + mxx::Float64 + mxy::Float64 + mw::Float64 + n::Int64 +end +WeightedFit() = WeightedFit(0.0, 0.0, 0.0, 0.0, 0.0, 0) +function finalizewlinreg(f::WeightedFit) + b = (f.mxy - f.mx * f.my) / (f.mxx - f.mx * f.mx) + a = f.my - b * f.mx + a, b +end +@inline function smoowthwlinfit(xi, yi, wi, f::WeightedFit) + n = f.n + 1 + mw = smooth(f.mw, wi, inv(n)) + updateweight = wi / (mw * n) + mx = smooth(f.mx, xi, updateweight) + my = smooth(f.my, yi, updateweight) + mxx = smooth(f.mxx, xi * xi, updateweight) + mxy = smooth(f.mxy, xi * yi, updateweight) + return WeightedFit(mx, my, mxx, mxy, mw, n) +end + +function wlinreg(x, y, w) + f = WeightedFit() + @inbounds for i in eachindex(x, y, w) + f = smoowthwlinfit(x[i], y[i], w[i], f) + end + finalizewlinreg(f) +end From 6250f73d58b1c77e0a64c19ecb9fd06d95efe912 Mon Sep 17 00:00:00 2001 From: Fabian Gans Date: Wed, 12 Feb 2025 17:26:53 +0100 Subject: [PATCH 2/3] add alternative rqatrend function, which is 20 times slower than original implementation --- src/RQADeforestation.jl | 1 + src/rqatrend.jl | 31 +++++++++++++++++++++++-------- src/weightedlinearregression.jl | 7 +++++++ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/RQADeforestation.jl b/src/RQADeforestation.jl index c409457..2a5feb5 100644 --- a/src/RQADeforestation.jl +++ b/src/RQADeforestation.jl @@ -11,6 +11,7 @@ using TestItems export gdalcube, rqatrend +include("weightedlinearregression.jl") include("metrics.jl") include("auxil.jl") include("rqatrend.jl") diff --git a/src/rqatrend.jl b/src/rqatrend.jl index cc388cf..cead943 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -8,7 +8,6 @@ using Distances Compute the RQA trend metric for the datacube `cube` with the epsilon threshold `thresh`. """ function rqatrend(cube; thresh=2, outpath=tempname() * ".zarr", overwrite=false, kwargs...) - @show outpath mapCube(rqatrend, cube, thresh; indims=InDims("Time"), outdims=OutDims(; outtype=Float32, path=outpath, overwrite, kwargs...)) end @@ -84,6 +83,29 @@ function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=CheckedEucli return 1000.0*(A \ b)[1] # slope end +function rqatrend_weightedonline(data, timeaxis; thresh=2, border=10, theiler=1, metric=CheckedEuclidean()) + # simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl + # x is the diagonal offset, y the percentage of local recurrence + # we compute the slope of a simple linear regression with bias from x to y + maxnum = last(timeaxis) - first(timeaxis) + start = 1 + finish = length(data) - border + wf = WeightedFit() + for i in start:finish + data[i] === missing && continue + for j in i+theiler:finish + data[j] === missing && continue + y = Float64(evaluate(metric, data[i], data[j]) <= thresh) + + x = Float64(timeaxis[j] - timeaxis[i]) + weight = 1 / (maxnum - x) + wf = smoowthwlinfit(x, y, weight, wf) + end + end + b = finalizewlinreg(wf)[2] + return 1000.0 * b +end + @testitem "rqatrend_impl" begin import AllocCheck @@ -137,11 +159,4 @@ end # assumes n is Int for optimal performance sumofsquares(n) = n*(n+1)*(2*n+1)/6 -""" - smooth(a, b, γ) -Weighted average of `a` and `b` with weight `γ`. - -``(1 - γ) * a + γ * b`` -""" -smooth(a, b, γ) = a + γ * (b - a) diff --git a/src/weightedlinearregression.jl b/src/weightedlinearregression.jl index b0b82e2..219567a 100644 --- a/src/weightedlinearregression.jl +++ b/src/weightedlinearregression.jl @@ -1,3 +1,10 @@ +""" + smooth(a, b, γ) + +Weighted average of `a` and `b` with weight `γ`. + +``(1 - γ) * a + γ * b`` +""" smooth(a, b, γ) = a + γ * (b - a) struct WeightedFit From 67f25bdef96fa5173cc31d227e593c7ed5b3e296 Mon Sep 17 00:00:00 2001 From: Fabian Gans Date: Thu, 13 Feb 2025 15:12:06 +0100 Subject: [PATCH 3/3] add unit test for weighted regression --- src/rqatrend.jl | 64 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/src/rqatrend.jl b/src/rqatrend.jl index cead943..bc7491e 100644 --- a/src/rqatrend.jl +++ b/src/rqatrend.jl @@ -83,6 +83,25 @@ function rqatrend_impl(data; thresh=2, border=10, theiler=1, metric=CheckedEucli return 1000.0*(A \ b)[1] # slope end +@testitem "rqatrend_impl" begin + import AllocCheck + import Random + Random.seed!(1234) + + x = 1:0.01:30 + y = sin.(x) + 0.1x + rand(length(x)) + + @test isapprox(RQADeforestation.rqatrend_impl(y; thresh=0.5), -0.11125611687816017) + @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Float64}})) + + y2 = similar(y, Union{Float64,Missing}) + copy!(y2, y) + y2[[1, 4, 10, 20, 33, 65]] .= missing + + @test isapprox(RQADeforestation.rqatrend_impl(y2; thresh=0.5), -0.11069045524336744) + @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Union{Float64,Missing}}})) +end + function rqatrend_weightedonline(data, timeaxis; thresh=2, border=10, theiler=1, metric=CheckedEuclidean()) # simplified implementation of https://stats.stackexchange.com/a/370175 and https://github.com/joshday/OnlineStats.jl/blob/b89a99679b13e3047ff9c93a03c303c357931832/src/stats/linreg.jl # x is the diagonal offset, y the percentage of local recurrence @@ -106,25 +125,40 @@ function rqatrend_weightedonline(data, timeaxis; thresh=2, border=10, theiler=1, return 1000.0 * b end - -@testitem "rqatrend_impl" begin - import AllocCheck - import Random +@testitem "Weighted RQATrend with x values" begin + using RQADeforestation + using Distributions + using RecurrenceAnalysis + using Random, Test Random.seed!(1234) + x = 1:200 + noise = rand(Normal(0, 1), 200) + y1 = 2.0 .* sin.(x ./ 10) .+ noise + y2 = copy(noise) + y2[101:200] .-= 2 + + # rp1 = RecurrenceMatrix(y1, 0.2) + # rp2 = RecurrenceMatrix(y2, 0.2) + theiler = 1 + border = 10 + + rqarange = (1+theiler):(200-border) + + r1 = RQADeforestation.rqatrend_weightedonline(y2, x) + @test r1 == -2.66417434040921 + + # Add some missings + y2mis = collect(Union{Missing,Float64}, y2) + y2mis[10:20] .= missing + goodinds = (!ismissing).(y2mis) + + r1miss = RQADeforestation.rqatrend_weightedonline(y2mis[goodinds], range(0, 1, length=length(y2mis))[goodinds]) + r2miss = RQADeforestation.rqatrend_weightedonline(y2mis, range(0, 1, length=length(y2mis))) + @test r1miss == r2miss == -514.9110636319965 +end - x = 1:0.01:30 - y = sin.(x) + 0.1x + rand(length(x)) - @test isapprox(RQADeforestation.rqatrend_impl(y; thresh=0.5), -0.11125611687816017) - @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Float64}})) - - y2 = similar(y, Union{Float64,Missing}) - copy!(y2, y) - y2[[1, 4, 10, 20, 33, 65]] .= missing - @test isapprox(RQADeforestation.rqatrend_impl(y2; thresh=0.5), -0.11069045524336744) - @test isempty(AllocCheck.check_allocs(RQADeforestation.rqatrend_impl, Tuple{Vector{Union{Float64,Missing}}})) -end function tau_rr(y, d; thresh=2, metric=CheckedEuclidean())