Skip to content

Commit 7e4eda9

Browse files
authored
Merge pull request #55 from DilumAluthge/dpa/disable-travis-ci
Several tweaks to the CI and testing configurations
2 parents 58c1fbb + 0f9f0d8 commit 7e4eda9

File tree

6 files changed

+96
-57
lines changed

6 files changed

+96
-57
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: CI (Julia nightly)
2+
on:
3+
pull_request:
4+
branches:
5+
- master
6+
push:
7+
branches:
8+
- master
9+
tags: '*'
10+
jobs:
11+
test-julia-nightly:
12+
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
13+
runs-on: ${{ matrix.os }}
14+
strategy:
15+
fail-fast: false
16+
matrix:
17+
JULIA_NUM_THREADS:
18+
- '1'
19+
- '6'
20+
version:
21+
- 'nightly'
22+
os:
23+
- ubuntu-latest
24+
arch:
25+
- x64
26+
steps:
27+
- uses: actions/checkout@v2
28+
- uses: julia-actions/setup-julia@v1
29+
with:
30+
version: ${{ matrix.version }}
31+
arch: ${{ matrix.arch }}
32+
- uses: actions/cache@v1
33+
env:
34+
cache-name: cache-artifacts
35+
with:
36+
path: ~/.julia/artifacts
37+
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
38+
restore-keys: |
39+
${{ runner.os }}-test-${{ env.cache-name }}-
40+
${{ runner.os }}-test-
41+
${{ runner.os }}-
42+
- uses: julia-actions/julia-buildpkg@v1
43+
- uses: julia-actions/julia-runtest@v1
44+
- uses: julia-actions/julia-processcoverage@v1
45+
- uses: codecov/codecov-action@v1
46+
with:
47+
file: lcov.info

.github/workflows/ci.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@ jobs:
1111
test:
1212
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
1313
runs-on: ${{ matrix.os }}
14-
continue-on-error: ${{ matrix.version == 'nightly' }}
1514
strategy:
1615
fail-fast: false
1716
matrix:
17+
JULIA_NUM_THREADS:
18+
- '1'
19+
- '6'
1820
version:
19-
- '1.4' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
20-
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
21-
- 'nightly'
21+
- '1.4'
22+
- '1' # automatically expands to the latest stable 1.x release of Julia
2223
os:
2324
- ubuntu-latest
2425
arch:
@@ -41,6 +42,8 @@ jobs:
4142
${{ runner.os }}-
4243
- uses: julia-actions/julia-buildpkg@v1
4344
- uses: julia-actions/julia-runtest@v1
45+
env:
46+
JULIA_NUM_THREADS: ${{ matrix.JULIA_NUM_THREADS }}
4447
- uses: julia-actions/julia-processcoverage@v1
4548
- uses: codecov/codecov-action@v1
4649
with:

.travis.yml

Lines changed: 0 additions & 22 deletions
This file was deleted.

Project.toml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,19 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
1010

1111
[compat]
12+
CUDA = "1, 2"
1213
DiffRules = "1"
14+
FillArrays = "0.10"
15+
ForwardDiff = "0.10"
16+
KernelAbstractions = "0.4"
17+
LoopVectorization = "0.8.26, 0.9.7"
18+
NamedDims = "0.2"
19+
OffsetArrays = "1"
1320
Requires = "1"
21+
TensorOperations = "3"
22+
Tracker = "0.2"
23+
VectorizationBase = "0.12.33, 0.13.10"
24+
Zygote = "0.5"
1425
julia = "1.3"
1526

1627
[extras]
@@ -27,7 +38,8 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
2738
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
2839
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2940
Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
41+
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
3042
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
3143

3244
[targets]
33-
test = ["Test", "CUDA", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Printf", "Random", "TensorOperations", "Tracker", "Zygote"]
45+
test = ["Test", "CUDA", "FillArrays", "ForwardDiff", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "NamedDims", "OffsetArrays", "Printf", "Random", "TensorOperations", "Tracker", "VectorizationBase", "Zygote"]

README.md

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
<div align="center">
22
<h1>Tullio.jl</h1>
33

4-
<!--[![Travis CI](https://img.shields.io/travis/mcabbott/Tullio.jl/master?logo=travis)](https://travis-ci.org/mcabbott/Tullio.jl)-->
5-
[![Github CI](https://img.shields.io/github/workflow/status/mcabbott/Tullio.jl/CI?label=build&logo=github)](https://github.com/mcabbott/Tullio.jl/actions)
4+
[![CI](https://github.com/mcabbott/Tullio.jl/workflows/CI/badge.svg)](https://github.com/mcabbott/Tullio.jl/actions?query=workflow%3ACI)
65
[![Gitlab GPU](https://img.shields.io/gitlab/pipeline/JuliaGPU/Tullio.jl/master?logo=nvidia&color=ddd)](https://gitlab.com/JuliaGPU/Tullio.jl/-/pipelines)
76
[![Tag Version](https://img.shields.io/github/v/tag/mcabbott/Tullio.jl?color=red&logo=)](https://github.com/mcabbott/Tullio.jl/releases)
87
</div>
@@ -21,7 +20,7 @@ Tullio is a very flexible einsum macro. It understands many array operations wri
2120

2221
Used by itself the macro writes ordinary nested loops much like [`Einsum.@einsum`](https://github.com/ahwillia/Einsum.jl).
2322
One difference is that it can parse more expressions (such as the convolution `M`, and worse).
24-
Another is that it will use multi-threading (via [`Threads.@spawn`](https://julialang.org/blog/2019/07/multithreading/)) and recursive tiling, on large enough arrays.
23+
Another is that it will use multi-threading (via [`Threads.@spawn`](https://julialang.org/blog/2019/07/multithreading/)) and recursive tiling, on large enough arrays.
2524
But it also co-operates with various other packages, provided they are loaded before the macro is called:
2625

2726
* It uses [`LoopVectorization.@avx`](https://github.com/chriselrod/LoopVectorization.jl) to speed many things up. (Disable with `avx=false`.) On a good day this will match the speed of OpenBLAS for matrix multiplication.
@@ -48,18 +47,18 @@ The expression need not be just one line, for example:
4847

4948
Here the macro cannot infer the range of the output's indices `x,y`, so they must be provided explicitly.
5049
(If writing into an existing array, with `out[x,y] = begin ...` or `+=`, then ranges would be taken from there.)
51-
Because it sees assignment being made, it does not attempt to sum over `a,b`, and it assumes that indices could go out of bounds so does not add `@inbounds` for you.
50+
Because it sees assignment being made, it does not attempt to sum over `a,b`, and it assumes that indices could go out of bounds so does not add `@inbounds` for you.
5251
(Although in fact `mod(x+a) == mod(x+a, axes(mat,1))` is safe.)
5352
It will also not be able to take a symbolic derivative, but dual numbers will work fine.
5453

5554
Pipe operators `|>` or `<|` indicate functions to be performed *outside* the sum, for example:
5655

5756
```julia
58-
@tullio lse[j] := log <| exp(mat[i,j]) # vec(log.(sum(exp.(mat), dims=1)))
57+
@tullio lse[j] := log <| exp(mat[i,j]) # vec(log.(sum(exp.(mat), dims=1)))
5958
```
6059

6160
The option `@tullio verbose=true` will cause it to print index ranges, symbolic derivatives,
62-
and notices when it is unable to use the packages mentioned above.
61+
and notices when it is unable to use the packages mentioned above.
6362
And `verbose=2` will print everything.
6463

6564
<details><summary><b>Notation</b></summary>
@@ -93,7 +92,7 @@ S = [0,1,0,0, 0,0,0,0]
9392
fft(S) @tullio F[k] := S[x] * exp(-im*pi/8 * (k-1) * x) (k axes(S,1))
9493

9594
# Finalisers <| or |> are applied after sum (the two are equivalent):
96-
@tullio N2[j] := sqrt <| M[i,j]^2 # N2 ≈ map(norm, eachcol(M))
95+
@tullio N2[j] := sqrt <| M[i,j]^2 # N2 ≈ map(norm, eachcol(M))
9796
@tullio n3[_] := A[i]^3 |> (_)^(1/3) # n3[1] ≈ norm(A,3), with _ anon. func.
9897

9998
# Reduction over any function:
@@ -115,8 +114,8 @@ using NamedDims, AxisKeys # Dimension names, plus pretty printing:
115114
</details>
116115
<details><summary><b>Fast & slow</b></summary>
117116

118-
When used with LoopVectorization, on straightforward matrix multiplication of real numbers,
119-
`@tullio` tends to be about as fast as OpenBLAS. Depending on the size, and on your computer.
117+
When used with LoopVectorization, on straightforward matrix multiplication of real numbers,
118+
`@tullio` tends to be about as fast as OpenBLAS. Depending on the size, and on your computer.
120119
Here's a speed comparison on mine: [v2.5](https://github.com/mcabbott/Tullio.jl/blob/master/benchmarks/02/matmul-0.2.5-Float64-1.5.0.png).
121120

122121
This is a useful diagnostic, but isn't really the goal. Two things `@tullio` is often
@@ -146,7 +145,7 @@ X = rand(1000,1000);
146145
Complex numbers aren't handled by LoopVectorization, so will be much slower.
147146

148147
Chained multiplication is also very slow, because it doesn't know there's a better
149-
algorithm. Here it just makes 4 loops, instead of multiplying sequentially,
148+
algorithm. Here it just makes 4 loops, instead of multiplying sequentially,
150149
`30^4` instead of `2 * 30^3` operations:
151150

152151
```julia
@@ -155,7 +154,7 @@ M1, M2, M3 = randn(30,30), randn(30,30), randn(30,30);
155154
@btime @tullio M4[i,l] := $M1[i,j] * $M2[j,k] * $M3[k,l]; # 30.401 μs
156155
```
157156

158-
At present indices using `pad`, `clamp` or `mod` are also slow. These result in extra
157+
At present indices using `pad`, `clamp` or `mod` are also slow. These result in extra
159158
checks or operations at every iteration, not just around the edges:
160159

161160
```julia
@@ -169,7 +168,7 @@ x100 = rand(100,100); k7 = randn(7,7);
169168
@btime conv3($x100, $k7); # 283.634 μs
170169

171170
using Flux
172-
x104 = reshape(x100,(100,100,1,1)); k74 = reshape(k7,(7,7,1,1));
171+
x104 = reshape(x100,(100,100,1,1)); k74 = reshape(k7,(7,7,1,1));
173172
conv1(x100, k7) @btime CrossCor($k74, false)($x104) # 586.694 μs
174173
conv2(x100, k7) @btime Conv($k74, false, stride=2)($x104) # 901.573 μs
175174
conv3(x100, k7) @btime Conv($k74, false, pad=3)($x104) # 932.658 μs
@@ -180,7 +179,7 @@ conv3(x100, k7) ≈ @btime Conv($k74, false, pad=3)($x104) # 932.658 μs
180179

181180
```julia
182181
using Tullio
183-
mul(A, B) = @tullio C[i,k] := A[i,j] * B[j,k]
182+
mul(A, B) = @tullio C[i,k] := A[i,j] * B[j,k]
184183

185184
A = rand(3,40); B = rand(40,500);
186185
A * B mul(A, B) # true
@@ -219,7 +218,7 @@ end (x in 1:10, y in 1:10) # and prevents range of x from being inferred.
219218
# A stencil?
220219
offsets = [(a,b) for a in -2:2 for b in -2:2 if a>=b] # vector of tuples
221220

222-
@tullio out[x,y,1] = begin
221+
@tullio out[x,y,1] = begin
223222
a,b = offsets[k]
224223
i = clamp(x+a, extrema(axes(mat,1))...)
225224
# j = clamp(y+b, extrema(axes(mat,2))...) # can be written clamp(y+b)
@@ -241,11 +240,11 @@ Zygote.gradient(sum∘rowmap, fs, ones(3,2))
241240
<details><summary><b>Options</b></summary>
242241

243242
The default setting is:
244-
```@tullio threads=true fastmath=true avx=true tensor=true cuda=256 grad=Base verbose=false A[i,j] := ...```
243+
```@tullio threads=true fastmath=true avx=true tensor=true cuda=256 grad=Base verbose=false A[i,j] := ...```
245244
* `threads=false` turns off threading, while `threads=64^3` sets a threshold size at which to divide the work (replacing the macro's best guess).
246245
* `avx=false` turns off the use of `LoopVectorization`, while `avx=4` inserts `@avx unroll=4 for i in ...`.
247246
* `grad=false` turns off gradient calculation, and `grad=Dual` switches it to use `ForwardDiff` (which must be loaded).
248-
* `nograd=A` turns of the gradient calculation just for `A`, and `nograd=(A,B,C)` does this for several arrays.
247+
* `nograd=A` turns of the gradient calculation just for `A`, and `nograd=(A,B,C)` does this for several arrays.
249248
* `tensor=false` turns off the use of `TensorOperations`.
250249
* Assignment `xi = ...` removes `xi` from the list of indices: its range is note calculated, and it will not be summed over. It also disables `@inbounds` since this is now up to you.
251250
* `verbose=true` prints things like the index ranges inferred, and gradient calculations. `verbose=2` prints absolutely everything.
@@ -256,20 +255,20 @@ The default setting is:
256255
Implicit:
257256
* Indices without shifts must have the same range everywhere they appear, but those with shifts (even `A[i+0]`) run over the intersection of possible ranges.
258257
* Shifted output indices must start at 1, unless `OffsetArrays` is visible in the calling module.
259-
* The use of `@avx`, and the calculation of gradients, are switched off by sufficiently complex syntax (such as arrays of arrays).
258+
* The use of `@avx`, and the calculation of gradients, are switched off by sufficiently complex syntax (such as arrays of arrays).
260259
* Gradient hooks are attached for any or all of `ReverseDiff`, `Tracker` & `Zygote`. These packages need not be loaded when the macro is run.
261260
* Gradients are only defined for reductions over `(+)` (default) and `min`, `max`.
262261
* GPU kernels are only constructed when both `KernelAbstractions` and `CUDA` are visible. The default `cuda=256` is passed to `kernel(CUDA(), 256)`.
263262
* The CPU kernels from `KernelAbstractions` are called only when `threads=false`; they are not at present very fast, but perhaps useful for testing.
264263

265264
Extras:
266-
* `A[i] := i^2 (i in 1:10)` is how you specify a range for indices when this can't be inferred.
265+
* `A[i] := i^2 (i in 1:10)` is how you specify a range for indices when this can't be inferred.
267266
* `A[i] := B[i, $col] - C[i, 2]` is how you fix one index to a constant (to prevent `col` being summed over).
268-
* `A[i] := $d * B[i]` is the preferred way to include other constants. Note that no gradient is calculated for `d`.
267+
* `A[i] := $d * B[i]` is the preferred way to include other constants. Note that no gradient is calculated for `d`.
269268
* Within indexing, `A[mod(i), clamp(j)]` both maps `i` & `j` to lie within `axes(A)`, and disables inference of their ranges from `A`.
270269
* Similarly, `A[pad(i,3)]` extends the range of `i`, inserting zeros outside of `A`. Instead of zero, `pad=NaN` uses this value as padding. The implementation of this (and `mod`, `clamp`) is not very fast at present.
271270
* On the left, when making a new array, an underscore like `A[i+_] :=` inserts whatever shift is needed to make `A` one-based.
272-
* `Tullio.@printgrad (x+y)*log(x/z) x y z` prints out how symbolic derivatives will be done.
271+
* `Tullio.@printgrad (x+y)*log(x/z) x y z` prints out how symbolic derivatives will be done.
273272

274273
</details>
275274
<details><summary><b>Interals</b></summary>
@@ -386,7 +385,7 @@ function ∇act!(::Type, ΔC, ΔA, ΔB, C, A, B, ax_i, ax_j, ax_k, keep)
386385
end
387386
```
388387

389-
Writing `@tullio verbose=2` will print all of these functions out.
388+
Writing `@tullio verbose=2` will print all of these functions out.
390389

391390
Scalar reductions, such as `@tullio s := A[i,j] * log(B[j,i])`, are slightly different in that the `act!` function simply returns the sum, i.e. the variable `acc` above.
392391

@@ -395,7 +394,7 @@ Scalar reductions, such as `@tullio s := A[i,j] * log(B[j,i])`, are slightly dif
395394

396395
Back-end friends & relatives:
397396

398-
* [LoopVectorization.jl](https://github.com/chriselrod/LoopVectorization.jl) is used here, if available.
397+
* [LoopVectorization.jl](https://github.com/chriselrod/LoopVectorization.jl) is used here, if available.
399398

400399
* [Gaius.jl](https://github.com/MasonProtter/Gaius.jl) and [PaddedMatrices.jl](https://github.com/chriselrod/PaddedMatrices.jl) build on that.
401400

@@ -415,7 +414,7 @@ Front-end near-lookalikes:
415414

416415
Things you can't run:
417416

418-
* [Tortilla.jl](https://www.youtube.com/watch?v=Rp7sTl9oPNI) seems to exist, publicly, only in this very nice talk.
417+
* [Tortilla.jl](https://www.youtube.com/watch?v=Rp7sTl9oPNI) seems to exist, publicly, only in this very nice talk.
419418

420419
* [ArrayMeta.jl](https://github.com/shashi/ArrayMeta.jl) was a Julia 0.5 take on some of this.
421420

test/runtests.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,17 +205,17 @@ _gradient(x...) = Yota.grad(x...)[2]
205205

206206
t8 = time()
207207
using LoopVectorization
208+
using VectorizationBase
208209

209-
if isdefined(LoopVectorization, :SVec) # version 0.8, for Julia ⩽1.5
210-
using LoopVectorization.VectorizationBase: SVec, Mask
211-
else # version 0.9, supports Julia 1.6
212-
using LoopVectorization.VectorizationBase: Vec, Mask
213-
SVec{N,T} = Vec{N,T}
210+
@static if Base.VERSION >= v"1.5"
211+
const Vec = VectorizationBase.Vec
212+
else
213+
const Vec = VectorizationBase.SVec
214214
end
215215

216216
@testset "LoopVectorization onlyone" begin
217-
ms = Mask{UInt8}(0x03); # Mask{8,Bool}<1, 1, 0, 0, 0, 0, 0, 0>
218-
sv = SVec{4,Int}(1,2,3,4) # SVec{4,Int64}<1, 2, 3, 4>
217+
ms = mask(Val(8), 2) # Mask{8,Bool}<1, 1, 0, 0, 0, 0, 0, 0>
218+
sv = Vec{4,Int}(1,2,3,4) # Vec{4,Int64}<1, 2, 3, 4>
219219

220220
# preliminaries:
221221
@test Tullio.allzero(sv) === false

0 commit comments

Comments
 (0)