diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3bcb66d..6674c61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ jobs: - '1' - '6' # t>2 might be ignored on Julia <= 1.5 version: - - '1.4' + - '1.5' - '1' # automatically expands to the latest stable 1.x release of Julia steps: - uses: actions/checkout@v2 diff --git a/Project.toml b/Project.toml index 50cedaf..9769ad3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Tullio" uuid = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" authors = ["Michael Abbott"] -version = "0.2.14" +version = "0.3.0" [deps] DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" @@ -9,23 +9,25 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Requires = "ae029012-a4dd-5104-9daa-d747884805df" [compat] -CUDA = "1, 2" +CUDA = "2" +CUDAKernels = "0.1" DiffRules = "1" -FillArrays = "0.10" +FillArrays = "0.10, 0.11" ForwardDiff = "0.10" -KernelAbstractions = "0.5.2" -LoopVectorization = "0.8.26, 0.9.20" +KernelAbstractions = "0.6" +LoopVectorization = "0.12.12" NamedDims = "0.2" OffsetArrays = "1" Requires = "1" TensorOperations = "3" Tracker = "0.2" -VectorizationBase = "0.12.33, 0.15.7" -Zygote = "0.6" -julia = "1.3" +VectorizationBase = "0.19.30" +Zygote = "0.6.9" +julia = "1.5" [extras] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" @@ -42,4 +44,4 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "CUDA", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"] +test = ["Test", "CUDA", "CUDAKernels", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"] diff --git a/src/eval.jl b/src/eval.jl index 200df6f..22ecf4c 100644 --- a/src/eval.jl +++ b/src/eval.jl @@ -51,19 +51,13 @@ using Requires @inline anyone(cond::Bool) = cond +#= + @init @require LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" begin - using .LoopVectorization - if isdefined(LoopVectorization, :SVec) # version 0.8, for Julia ⩽1.5 - using .LoopVectorization.VectorizationBase: SVec, Mask, prevpow2 - @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin - # Dual numbers + svec, not needed on version 0.9 - include("grad/avxdual.jl") - end - else # version 0.9, supports Julia 1.6 - using .LoopVectorization.VectorizationBase: Vec, Mask, prevpow2 - SVec{N,T} = Vec{N,T} + using .LoopVectorization # version 0.9+ only now + using .LoopVectorization.VectorizationBase: Vec, Mask, prevpow2 + SVec{N,T} = Vec{N,T} end -#= # Functions needed for safe vectorised max gradient @inline Tullio.onlyone(cond::Bool, seen::SVec) = cond && allzero(seen) @@ -75,9 +69,10 @@ using Requires @inline allzero(seen::SVec) = iszero((!iszero(seen)).u) @inline Tullio.anyone(cond::Mask) = !iszero(cond.u) -=# end +=# + #========== CuArrays ==========# using Requires diff --git a/test/cuda.jl b/test/cuda.jl index 80bd0dc..2c1624a 100644 --- a/test/cuda.jl +++ b/test/cuda.jl @@ -1,6 +1,6 @@ using Tullio, Test -using CUDA, KernelAbstractions +using CUDA, CUDAKernels, KernelAbstractions CUDA.allowscalar(false) using Tracker, ForwardDiff @tullio grad=Base diff --git a/test/gradients.jl b/test/gradients.jl index 194a995..060ef33 100644 --- a/test/gradients.jl +++ b/test/gradients.jl @@ -6,7 +6,7 @@ This file is run several times =# using Tullio, Test, ForwardDiff, Random -# using Tracker; _gradient(x...) = Tracker.gradient(x...); GRAD = :Tracker +# using Tracker; _gradient(x...) = Tracker.gradient(x...); GRAD = :Tracker; macro printline() end function gradtest(f, dims) x = randn(dims...) @@ -16,8 +16,6 @@ end @testset "simple" begin -if Tullio._GRAD[] != :Dual || VERSION >= v"1.5" # These 3 give errors on Julia 1.4, LV 0.8, I have no idea why. - @test _gradient(x -> sum(@tullio y[i] := 2*x[i]), rand(3))[1] == [2,2,2] @test _gradient(x -> sum(@tullio y[i] := 2*x[i] + i), rand(3))[1] == [2,2,2] @@ -32,7 +30,6 @@ if Tullio._GRAD[] != :Dual || VERSION >= v"1.5" # These 3 give errors on Julia 1 g_fd = ForwardDiff.gradient(x -> sum(sin, g2(x)), r100) @test g_fd ≈ _gradient(x -> sum(sin, g2(x)), r100)[1] -end r100 = randn(100) # scalar output @@ -68,6 +65,9 @@ end @test abs2_grad ≈ _gradient(v -> (@tullio s := abs2(1 + v[i]^2)), va)[1] end + +@printline + @testset "zero-arrays" begin # Using zero-dim arrays fails on ReverseDiff & Tracker @@ -106,6 +106,9 @@ end # [1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 0.0, 64.0, 81.0, 100.0, 121.0] ≈ [1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0, 81.0, 100.0, 121.0] end + +@printline + @testset "shifts, etc" begin c1(N,K) = @tullio M[x,y,c] := N[x+i-1, y+j-1,c] * K[i,j] @@ -158,6 +161,9 @@ end end end + +@printline + @testset "from TensorTrace" begin # These can all be handled using TensorOperations @@ -196,6 +202,8 @@ end con7(x) = @tullio C[m,n,j,i] := 44 * x[i,j,k] * r392[k,m,n] @test gradtest(con7, (9,2,3)) + @printline + ## contract! B con8b(x) = @tullio K[i,j] := 5 * r32[i,k] * x[k,j] @test gradtest(con8b, (2,3)) @@ -215,15 +223,19 @@ end con14(x) = @tullio K[i,j] := r3399[a,b,j,k] * x[b,c,k,i] * r33[a,c] @test gradtest(con14, (3,3,9,9)) + @printline + ## scalar -- one with :=, one without - sc1(x) = @tullio s = r22[b,β] * x[a,b,c] * r312[c,a,β] - @test gradtest(sc1, (1,2,3)) + sc1(x) = @tullio s = r22[b,β] * x[a,b,c] * r312[c,a,β] avx=false + @test gradtest(sc1, (1,2,3)) # UndefVarError: ####op#798_0 not defined - sc2(x) = @tullio s := x[γ,c] * r3399[c,γ,i,i] + sc2(x) = @tullio s := x[γ,c] * r3399[c,γ,i,i] avx=false @test gradtest(sc2, (3,3)) end +@printline + if Tullio._GRAD[] != :Dual #= @testset "products" begin @@ -319,6 +331,9 @@ if Tullio._GRAD[] != :Dual # I suspect that @avx is re-ordering loops, which makes onlyone() incorrect. end + + @printline + @testset "finalisers" begin norm2(m) = @tullio n[i] := m[i,j]^2 |> sqrt @@ -328,9 +343,11 @@ if Tullio._GRAD[] != :Dual @test _gradient(sum∘norm2, mat)[1] ≈ ForwardDiff.gradient(sum∘norm2, mat) @test gradtest(norm2, (3,4)) - layer(x) = @tullio y[i,k] := mat[i,j] * x[j,k] |> tanh + layer(x) = @tullio y[i,k] := mat[i,j] * x[j,k] |> tanh avx=false # this takes 15 mins +? @test gradtest(layer, (3,4)) + @printline + lse1(mat) = @tullio lse[j] := log <| exp(mat[i,j]) @test gradtest(lse1, (3,4)) @@ -344,6 +361,8 @@ if Tullio._GRAD[] != :Dual end end +@printline + if GRAD == :Zygote @testset "nograd keyword" begin @@ -358,3 +377,5 @@ if GRAD == :Zygote end end + +@printline diff --git a/test/group-3.jl b/test/group-3.jl index cbac348..9e15488 100644 --- a/test/group-3.jl +++ b/test/group-3.jl @@ -153,20 +153,15 @@ end @testset "parsing + LoopVectorization" begin include("parsing.jl") end -if test_group != "3" # Github CI fails, on some runs, "ERROR: Package Tullio errored during testing (received signal: KILL)" - # https://github.com/mcabbott/Tullio.jl/pull/57/checks?check_run_id=1753332805 - - using Tracker - GRAD = :Tracker - _gradient(x...) = Tracker.gradient(x...) - - @tullio grad=Base - @testset "gradients: Tracker + DiffRules + LoopVectorization" begin include("gradients.jl") end +using Tracker +GRAD = :Tracker +_gradient(x...) = Tracker.gradient(x...) - @tullio grad=Dual - @testset "gradients: Tracker + ForwardDiff + LoopVectorization" begin include("gradients.jl") end +@tullio grad=Base +@testset "gradients: Tracker + DiffRules + LoopVectorization" begin include("gradients.jl") end -end +@tullio grad=Dual +@testset "gradients: Tracker + ForwardDiff + LoopVectorization" begin include("gradients.jl") end @info @sprintf("LoopVectorization tests took %.1f seconds", time()-t8) diff --git a/test/parsing.jl b/test/parsing.jl index 2a40ae2..5244617 100644 --- a/test/parsing.jl +++ b/test/parsing.jl @@ -11,7 +11,7 @@ using Tullio, Test, LinearAlgebra @test A == [i^2 for i in 1:10] # diagonals - @tullio D[i,i] := trunc(Int, sqrt(A[i])) avx=false # MethodError: no method matching trunc(::Type{Int64}, ::VectorizationBase.Vec{4,Float64}) + @tullio D[i,i] := trunc(Int, sqrt(A[i])) @test D == Diagonal(sqrt.(A)) # arrays of arrays @@ -42,11 +42,11 @@ using Tullio, Test, LinearAlgebra @test S ≈ S′ ≈ sum(A)/2 # almost scalar - @tullio Z[] := A[i] + A[j] + @tullio Z[] := A[i] + A[j] avx=false @test Z isa Array{Int,0} - @tullio Z′[1,1] := A[i] + A[j] + @tullio Z′[1,1] := A[i] + A[j] avx=false @test size(Z′) == (1,1) - @tullio Z′′[_] := A[i] + A[j] + @tullio Z′′[_] := A[i] + A[j] avx=false @test size(Z′′) == (1,) @test Z[] == Z′[1,1] == Z′′[1] == sum(A .+ A') @@ -79,14 +79,13 @@ using Tullio, Test, LinearAlgebra @test A2 == 2 .* A # broadcasting - @tullio S[i] := sqrt.(M[:,i]) # avx & grad now disabled by try/catch - # @tullio T[i] := A[i] .+ A[j] # dot does nothing, fails with LoopVectorization loaded + @tullio S[i] := sqrt.(M[:,i]) # scope f(x,k) = @tullio y[i] := x[i] + i + $k @test f(ones(3),j) == 1 .+ (1:3) .+ j - g(x) = @tullio y := sqrt(x[i]) + g(x) = @tullio y := sqrt(x[i]) avx=false @test g(fill(4,5)) == 10 # ranges @@ -128,7 +127,7 @@ using Tullio, Test, LinearAlgebra @test H[1,:] == M[2,:] # but H[3,:] gets written into twice. J′ = [1,2,10] - @tullio H′[J′[i'],k] := A[k] avx=false # StackOverflowError + @tullio H′[J′[i'],k] := A[k] @test size(H′) == (10, length(A)) @test H′[2,:] == A @test H′[3,4] == 0 # zeroed before being written into @@ -168,6 +167,8 @@ using Tullio, Test, LinearAlgebra end +@printline + @testset "in-place" begin A = [i^2 for i in 1:10] @@ -226,7 +227,7 @@ end # scatter operation D = similar(A, 10, 10) .= 999 inds = [2,3,5,2] - @tullio D[inds[i],j] = A[j] avx=false # StackOverflowError + @tullio D[inds[i],j] = A[j] @test D[2,:] == A @test D[4,4] != 0 # not zeroed before writing. @@ -259,6 +260,8 @@ end end +@printline + if !@isdefined OffsetArray @testset "without packages" begin @@ -351,7 +354,7 @@ using OffsetArrays @test axes(@tullio I[i,j] := A[i+j÷2] + 0 * B[j]) == (1:8, 1:4) @test axes(@tullio I[i,j] := A[i+(j-1)÷2] + 0 * B[j]) == (1:9, 1:4) - @test axes(@tullio I[i,j] := A[2i+(j-1)÷2] + 0 * B[j]) == (1:4, 1:4) + @test axes(@tullio I[i,j] := A[2i+(j-1)÷2] + 0 * B[j] avx=false) == (1:4, 1:4) # wtf? @test axes(@tullio I[i,j] := A[i+(j-1)÷3] + 0 * B[j]) == (1:9, 1:4) @test_throws LoadError @eval @tullio I[i,j] := A[i+j] # under-specified @@ -382,7 +385,7 @@ using OffsetArrays @test L == vec(I) .+ 1 V = OffsetArray([1,10,100,1000],2) # offset vector - @test axes(@tullio _[i] := log10(V[i])) == (3:6,) + @test axes(@tullio _[i] := log10(V[i]) avx=false) == (3:6,) # https://github.com/JuliaSIMD/LoopVectorization.jl/issues/249 # indexing by an array @tullio W[i] := I[end-i+1] avx=false # does not use lastindex(I,1) @@ -418,6 +421,8 @@ using OffsetArrays @test_throws LoadError @eval @tullio Z[i+_] = A[2i+10] # in-place end +@printline + @testset "modulo, clamped & padded" begin A = [i^2 for i in 1:10] @@ -427,7 +432,7 @@ end @test vcat(B, fill(B[end],5)) == @tullio D[i] := min(A[i], B[clamp(i)]) @test [4,16,36,64,100,4] == @tullio E[i] := A[mod(2i)] i in 1:6 - @test vcat(zeros(5), B, zeros(5)) == @tullio C[i] := B[pad(i-5,5)] avx=false # 1.4 + @test vcat(zeros(5), B, zeros(5)) == @tullio C[i] := B[pad(i-5,5)] avx=false # no method matching _vload(::VectorizationBase.FastRange{Int64, @test vcat(zeros(2), A, zeros(3)) == @tullio D[i+_] := A[pad(i,2,3)] @test vcat(A, zeros(10)) == @tullio E[i] := A[pad(i)] i in 1:20 @@ -462,6 +467,8 @@ end @test_throws InexactError @tullio J[i,i] := A[i] pad=im end +@printline + @testset "other reductions" begin A = [i^2 for i in 1:10] @@ -473,7 +480,7 @@ end @test true == @tullio (&) p := A[i] > 0 @test true === @tullio (&) p := A[i] > 0 - @test true == @tullio (|) q := A[i] > 50 + @test true == @tullio (|) q := A[i] > 50 avx=false # zero_mask not defined # in-place C = copy(A) @@ -494,7 +501,7 @@ end @test 200 == @tullio (max) m := A[i] init=200 @tullio (max) C[i] := i^2 (i in 1:10, j in 1:1) init=33.3 # widens type @test C == max.(33.3, A) - @tullio C[i] := 0 (i in 1:10, j in 1:1) init=randn() tensor=false # runs once + @tullio C[i] := 0 (i in 1:10, j in 1:1) init=randn() avx=false tensor=false # UndefVarError: ##op#1444_0__1 not defined @test C == fill(C[1], 10) # more dimensions @@ -530,11 +537,11 @@ end # promotion of init & += cases: B = rand(10) - @test sum(B.^2)+2 ≈ @tullio s2 := B[i]^2 init=2 threads=false avx=false # InexactError: Int64 on LV 0.8 + @test sum(B.^2)+2 ≈ @tullio s2 := B[i]^2 init=2 threads=false s3 = 3 @test sum(B.^2)+3 ≈ @tullio s3 += B[i]^2 s4 = 4im - @test sum(B.^2)+4im ≈ @tullio s4 += B[i]^2 avx=false # TypeError: in AbstractSIMD, in T, expected T<:(Union{Bool, Float32 + @test sum(B.^2)+4im ≈ @tullio s4 += B[i]^2 # no reduction means no redfun, and no init: @test_throws LoadError @eval @tullio (max) A2[i] := A[i]^2 @@ -542,6 +549,8 @@ end end +@printline + @testset "finalisers" begin A = [i^2 for i in 1:10] @@ -607,6 +616,8 @@ end end +@printline + @testset "options" begin # keyword threads accepts false or a positive integer @@ -618,7 +629,7 @@ end @test_throws LoadError @eval @tullio A[i] := (1:10)[i]^2 threads=:maybe # keyword verbose accepts values [true, false, 2, 3] - @tullio A[i] := (1:10)[i]^2 verbose=1 avx=false # @error: rejected by LoopVectorization's check_args + @tullio A[i] := (1:10)[i]^2 verbose=1 @tullio A[i] := (1:10)[i]^2 verbose=false @test_throws LoadError @eval @tullio A[i] := (1:10)[i]^2 verbose=4 @@ -693,3 +704,5 @@ end end end + +@printline diff --git a/test/runtests.jl b/test/runtests.jl index cca58fd..18601de 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,11 +17,16 @@ if Threads.nthreads() > 1 # use threading even on small arrays Tullio.TILE[] = 32 end +macro printline() # useful in hunting for where tests get stuck + file = split(string(__source__.file), "/")[end] + printstyled(" ", file, ":", __source__.line, "\n", color=:light_black) +end + if test_group in ["all", "1"] include("group-1.jl") end -if test_group in ["all", "2"] && VERSION <= v"1.6" # KA testing time-out https://github.com/JuliaGPU/KernelAbstractions.jl/issues/155 +if test_group in ["all", "2"] include("group-2.jl") end