Skip to content

Commit 5439bc3

Browse files
authored
Merge pull request #57 from mcabbott/avxci2
More LoopVectorization tests & checks
2 parents cf5e2aa + 09b60f1 commit 5439bc3

File tree

12 files changed

+228
-185
lines changed

12 files changed

+228
-185
lines changed

.buildkite/pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
env:
2-
JULIA_NUM_THREADS: "1"
2+
JULIA_NUM_THREADS: "6"
33
# SECRET_CODECOV_TOKEN: "..."
44

55
steps:

.github/workflows/ci-julia-nightly.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99
tags: '*'
1010
jobs:
1111
test-julia-nightly:
12-
name: NIGHTLY/t-${{ matrix.threads }}/group-${{ matrix.group }}/${{ github.event_name }}/${{ matrix.arch }}+${{ matrix.os }}
12+
name: NIGHTLY -t${{ matrix.threads }} / group-${{ matrix.group }} / ${{ github.event_name }} / ${{ matrix.os }}+${{ matrix.arch }}
1313
runs-on: ${{ matrix.os }}
1414
strategy:
1515
fail-fast: false
@@ -24,7 +24,7 @@ jobs:
2424
- ubuntu-latest
2525
threads:
2626
- '1'
27-
- '2'
27+
- '6'
2828
version:
2929
- 'nightly'
3030
steps:

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99
tags: '*'
1010
jobs:
1111
test:
12-
name: v${{ matrix.version }}/t-${{ matrix.threads }}/group-${{ matrix.group }}/${{ github.event_name }}/${{ matrix.arch }}+${{ matrix.os }}
12+
name: v${{ matrix.version }} -t${{ matrix.threads }} / group-${{ matrix.group }} / ${{ github.event_name }} / ${{ matrix.os }}+${{ matrix.arch }}
1313
runs-on: ${{ matrix.os }}
1414
strategy:
1515
fail-fast: false
@@ -24,7 +24,7 @@ jobs:
2424
- ubuntu-latest
2525
threads:
2626
- '1'
27-
- '2'
27+
- '6' # t>2 might be ignored on Julia <= 1.5
2828
version:
2929
- '1.4'
3030
- '1' # automatically expands to the latest stable 1.x release of Julia

Project.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ CUDA = "1, 2"
1313
DiffRules = "1"
1414
FillArrays = "0.10"
1515
ForwardDiff = "0.10"
16-
KernelAbstractions = "0.4"
17-
LoopVectorization = "0.8.26, 0.9.7"
16+
KernelAbstractions = "0.5.2"
17+
LoopVectorization = "0.8.26, 0.9.20"
1818
NamedDims = "0.2"
1919
OffsetArrays = "1"
2020
Requires = "1"
2121
TensorOperations = "3"
2222
Tracker = "0.2"
23-
VectorizationBase = "0.12.33, 0.13.10"
24-
Zygote = "0.5"
23+
VectorizationBase = "0.12.33, 0.15.7"
24+
Zygote = "0.6"
2525
julia = "1.3"
2626

2727
[extras]

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ using Tullio
9999
A = [abs2(i - 11) for i in 1:21]
100100

101101
# Downsample -- range of i is that allowed by both terms:
102-
@tullio D[i] := (A[2i] + A[2i+1])/2 # 1:10 == intersect(1:10, 0:10)
102+
@tullio B[i] := (A[2i] + A[2i+1])/2 # 1:10 == intersect(1:10, 0:10)
103103

104104
# Shifts -- range of i calculated in terms of that given for j:
105105
@tullio M[i,j] := A[i+j-1] (j in 1:15) # i in 1:7
@@ -129,6 +129,9 @@ fft(S) ≈ @tullio F[k] := S[x] * exp(-im*pi/8 * (k-1) * x) (k ∈ axes(S,1))
129129
@tullio (*) P[i] := A[i+k] (k in 0:2) # product
130130
@tullio (max) X[i,_] := D[i,j] # maximum(D, dims=2), almost
131131

132+
min1(x,y) = ifelse(first(x) < first(y), x, y); # findmin(D, dims=1), almost:
133+
@tullio (min1) Ts[j+_] := (D[i,j], (i,j)) init=(typemax(Int), (0,0))
134+
132135
# Access to fields & arrays -- this uses j ∈ eachindex(first(N).c)
133136
N = [(a=i, b=i^2, c=fill(i^3,3)) for i in 1:10]
134137
@tullio T[i,j] := (N[i].a // 1, N[i].c[j])
@@ -449,7 +452,7 @@ Front-end near-lookalikes:
449452

450453
* [Einsum.jl](https://github.com/ahwillia/Einsum.jl) makes simple loops. See [tests/einsum.jl](https://github.com/mcabbott/Tullio.jl/blob/master/test/einsum.jl) where `using Tullio: @einsum` is an almost-seamless replacement.
451454

452-
* [TensorOperations.jl](https://github.com/Jutho/TensorOperations.jl) and [OMEinsum.jl](https://github.com/under-Peter/OMEinsum.jl) identify patterns on which they can call various basic operations.
455+
* [TensorOperations.jl](https://github.com/Jutho/TensorOperations.jl) and [OMEinsum.jl](https://github.com/under-Peter/OMEinsum.jl) identify patterns on which they can call various basic operations. [TensorRules.jl](https://github.com/ho-oto/TensorRules.jl) makes `@tensor` differentiable; see also [TensorGrad.jl](https://github.com/mcabbott/TensorGrad.jl) and [TensorTrack.jl](https://github.com/mcabbott/TensorTrack.jl) for earlier attempts.
453456

454457
* [TensorCast.jl](https://github.com/mcabbott/TensorCast.jl) expresses everything as Julia array operations, broadcasting and reduction. (OMEinsum.jl also treats some cases as a special lazy broadcast-reduction.)
455458

src/eval.jl

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,27 @@ using Requires
5555
using .LoopVectorization
5656
if isdefined(LoopVectorization, :SVec) # version 0.8, for Julia ⩽1.5
5757
using .LoopVectorization.VectorizationBase: SVec, Mask, prevpow2
58+
@require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin
59+
# Dual numbers + svec, not needed on version 0.9
60+
include("grad/avxdual.jl")
61+
end
5862
else # version 0.9, supports Julia 1.6
5963
using .LoopVectorization.VectorizationBase: Vec, Mask, prevpow2
6064
SVec{N,T} = Vec{N,T}
6165
end
62-
66+
#=
6367
# Functions needed for safe vectorised max gradient
6468
@inline Tullio.onlyone(cond::Bool, seen::SVec) = cond && allzero(seen)
6569
6670
@inline Tullio.onlyone(cond::Mask{W}) where {W} = Mask{W}(prevpow2(cond.u))
6771
@inline Tullio.onlyone(cond::Mask, seen::Union{Int,SVec}) =
6872
Tullio.allzero(seen) ? Tullio.onlyone(cond) : zero(cond)
6973
70-
@inline allzero(seen::Int) = iszero(seen)
71-
@inline allzero(seen::SVec{N,Int}) where {N} = iszero((!iszero(seen)).u)
72-
73-
# @inline Tullio.anyone(cond::Mask) = cond != zero(cond)
74-
@inline Tullio.anyone(cond::Mask) = cond.u != zero(cond).u # for v0.9
74+
@inline allzero(seen::Integer) = iszero(seen)
75+
@inline allzero(seen::SVec) = iszero((!iszero(seen)).u)
7576
76-
@require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" begin
77-
# Dual numbers + svec, should live in PaddedMatricesForwardDiff?
78-
# (And where would the conditional loading go, still here?)
79-
include("grad/avxdual.jl")
80-
end
77+
@inline Tullio.anyone(cond::Mask) = !iszero(cond.u)
78+
=#
8179
end
8280

8381
#========== CuArrays ==========#

src/forward.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ function insert_forward_gradient(axislist, store)
1717

1818
defineepsilons, readepsilons = [], []
1919
for (d, (Aepsilon, Aex)) in enumerate(epsilondict)
20-
basis = [i==d ? :(one($TYP)) : :(zero($TYP)) for i in 1:length(epsilondict)]
21-
push!(defineepsilons, :($Aepsilon = ForwardDiff.Dual(zero($TYP), ($(basis...),))))
20+
basis = [i==d ? :($one($TYP)) : :($zero($TYP)) for i in 1:length(epsilondict)]
21+
push!(defineepsilons, :($Aepsilon = ForwardDiff.Dual($zero($TYP), ($(basis...),))))
2222
push!(readepsilons, :($Aex = $Aex + ForwardDiff.partials($ZED, $d) * $dZ[$(store.leftraw...)]))
2323
end
2424

src/macro.jl

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ padmodclamp_replace(s, store, inside=false) = s
466466
padmodclamp_replace(ex::Expr, store, inside=false) =
467467
if ex.head == :(=) && @capture_(ex.args[1], A_[inds__])
468468
# This tricky case is 𝛥A[pad(i,2)] = 𝛥A[pad(i,2)] + ...
469-
Aex, fun = padmodclamp_pair(A, inds, store)
469+
Aex, fun = padmodclamp_pair(A, inds, store, true)
470470
right = if fun != identity
471471
padmodclamp_replace(ex.args[2], store, true)
472472
else
@@ -481,7 +481,7 @@ padmodclamp_replace(ex::Expr, store, inside=false) =
481481
Expr(ex.head, args...)
482482
end
483483

484-
padmodclamp_pair(A, inds, store) = begin
484+
padmodclamp_pair(A, inds, store, assign=false) = begin
485485
nopadif = []
486486
inds4 = map(enumerate(inds)) do (d,ex)
487487
isexpr(ex, :call) || return ex
@@ -494,7 +494,8 @@ padmodclamp_pair(A, inds, store) = begin
494494
elseif ex.args[1] == :pad && length(ex.args) >= 2
495495
i = ex.args[2]
496496
if !all(==(0), ex.args[3:end]) || length(ex.args) == 2
497-
push!(nopadif, :($i $axes($A,$d)))
497+
# push!(nopadif, :($i >= first(axes($A,$d))), :($i <= last(axes($A,$d)))) # allows avx
498+
push!(nopadif, :($i >= first(axes($A,$d))), :($i <= Base.last(axes($A,$d)))) # allows avx... but LV 0.8, Julia 1.4, needs Base?
498499
end
499500
return i
500501
end
@@ -508,8 +509,10 @@ padmodclamp_pair(A, inds, store) = begin
508509
for c2 in nopadif[2:end]
509510
cond = :($cond & $c2)
510511
end
511-
if store.padkeyword == TYP # default
512-
ex -> :($cond ? $ex : $zero($eltype($A)))
512+
if assign # for gradients, this wraps 𝛥A[pad(i,2)] = 𝛥A[pad(i,2)] + ...
513+
ex -> :($cond && $ex)
514+
elseif store.padkeyword == TYP # default, pad with zero
515+
ex -> :($cond ? $ex : zero(eltype($A)))
513516
else
514517
ex -> :($cond ? $ex : $convert($eltype($A), $(store.padkeyword)))
515518
end
@@ -1070,16 +1073,15 @@ function make_many_actors(act!, args, ex1, outer::Vector, ex3, inner::Vector, ex
10701073
safe = if act! == ACT!
10711074
isempty(store.unsafeleft)
10721075
else # working on ∇act!
1073-
isempty(store.unsaferight) &&
1074-
store.redfun == :+ && # Disable @avx for min/max grad, #53
1075-
store.grad != :Dual # and for use with ForwardDiff
1076+
isempty(store.unsaferight)
10761077
end
10771078

10781079
if safe && store.avx != false && isdefined(store.mod, :LoopVectorization)
10791080
unroll = store.avx == true ? 0 : store.avx # unroll=0 is the default setting
10801081
info1 = store.verbose>0 ? :(@info "running LoopVectorization actor $($note)" maxlog=3 _id=$(hash(store))) : nothing
10811082
check1 = store.verbose>0 ? :(LoopVectorization.check_args($(store.arrays...)) || @error "rejected by LoopVectorization's check_args! $($note)" maxlog=3 _id=$(hash(store))) : nothing
10821083
try
1084+
act! == ACT! || store.redfun == :+ || throw("use of LoopVectorization for min/max gradients is disabled")
10831085
lex = if isnothing(exloopfinal)
10841086
quote
10851087

0 commit comments

Comments
 (0)