|
45 | 45 | W = VectorizationBase.pick_vector_width(Float64)
|
46 | 46 | @test @inferred(VectorizationBase.pick_integer(Val(W))) == (VectorizationBase.AVX512DQ ? Int64 : Int32)
|
47 | 47 |
|
48 |
| - |
| 48 | + |
49 | 49 | @test first(A) === A[1]
|
50 | 50 | @test W64S == W64
|
51 | 51 | @testset "Struct-Wrapped Vec" begin
|
|
175 | 175 | @test !VectorizationBase.vall(Mask{4}(0xfc))
|
176 | 176 | @test VectorizationBase.vall(Mask{8}(0xff))
|
177 | 177 | @test VectorizationBase.vall(Mask{4}(0xcf))
|
178 |
| - |
| 178 | + |
179 | 179 | @test VectorizationBase.vany(Mask{8}(0xfc))
|
180 | 180 | @test VectorizationBase.vany(Mask{4}(0xfc))
|
181 | 181 | @test !VectorizationBase.vany(Mask{8}(0x00))
|
|
211 | 211 | @test (Mask{8}(0xac) ⊻ true) === Mask{8}(0x53)
|
212 | 212 | @test (false ⊻ Mask{8}(0xac)) === Mask{8}(0xac)
|
213 | 213 | @test (true ⊻ Mask{8}(0xac)) === Mask{8}(0x53)
|
214 |
| - |
| 214 | + |
215 | 215 | @test (Mask{4}(0x05) | true) === Mask{4}(0x0f)
|
216 | 216 | @test (Mask{4}(0x05) | false) === Mask{4}(0x05)
|
217 | 217 | @test (true | Mask{4}(0x05)) === Mask{4}(0x0f)
|
|
239 | 239 | # @test VectorizationBase.size_loads(A,2, Val(8)) == eval(VectorizationBase.num_vector_load_expr(@__MODULE__, :((() -> 17)()), 8)) == eval(VectorizationBase.num_vector_load_expr(@__MODULE__, 17, 8)) == divrem(size(A,2), 8)
|
240 | 240 | # end
|
241 | 241 |
|
242 |
| - |
| 242 | + |
243 | 243 | @testset "vector_width.jl" begin
|
244 | 244 | @test all(VectorizationBase.ispow2, 0:1)
|
245 | 245 | @test all(i -> !any(VectorizationBase.ispow2, 1+(1 << (i-1)):(1 << i)-1 ) && VectorizationBase.ispow2(1 << i), 2:9)
|
|
282 | 282 | @test [vload(stridedpointer(C), (1+w, 2+w, 3)) for w ∈ 1:W64] == getindex.(Ref(C), 1 .+ (1:W64), 2 .+ (1:W64), 3)
|
283 | 283 | vstore!(stridedpointer(C), !mtest, ((MM{16})(17), 3, 4))
|
284 | 284 | @test .!v1 == C[17:32,3,4] == tovector(vload(stridedpointer(C), ((MM{16})(17), 3, 4)))
|
285 |
| - |
| 285 | + |
286 | 286 | dims = (41,42,43) .* 3;
|
287 | 287 | # dims = (41,42,43);
|
288 | 288 | A = reshape(collect(Float64(0):Float64(prod(dims)-1)), dims);
|
|
345 | 345 | @test v1 === vu.data[1]
|
346 | 346 | @test v2 === vu.data[2]
|
347 | 347 | @test v3 === vu.data[3]
|
348 |
| - |
| 348 | + |
349 | 349 | ir = 0:(AV == 1 ? W64-1 : 0); jr = 0:(AV == 2 ? W64-1 : 0); kr = 0:(AV == 3 ? W64-1 : 0)
|
350 | 350 | x1 = getindex.(Ref(B), i .+ ir, j .+ jr, k .+ kr)
|
351 | 351 | if AU == 1
|
|
364 | 364 | kr = kr .+ length(kr)
|
365 | 365 | end
|
366 | 366 | x3 = getindex.(Ref(B), i .+ ir, j .+ jr, k .+ kr)
|
367 |
| - |
| 367 | + |
368 | 368 | @test x1 == tovector(vu.data[1])
|
369 | 369 | @test x2 == tovector(vu.data[2])
|
370 | 370 | @test x3 == tovector(vu.data[3])
|
|
398 | 398 | end
|
399 | 399 | @test x == 1:100
|
400 | 400 | end
|
401 |
| - |
| 401 | + |
402 | 402 | @testset "Grouped Strided Pointers" begin
|
403 | 403 | M, K, N = 4, 5, 6
|
404 | 404 | A = rand(M, K); B = rand(K, N); C = rand(M, N);
|
|
426 | 426 | Vec(ntuple(_ -> (randn()), Val(W64))...)
|
427 | 427 | ))
|
428 | 428 | x = tovector(v)
|
429 |
| - for f ∈ [-, abs, inv, floor, ceil, trunc, round, sqrt ∘ abs] |
| 429 | + for f ∈ [-, abs, inv, floor, ceil, trunc, round, sqrt ∘ abs, VectorizationBase.relu] |
430 | 430 | @test tovector(@inferred(f(v))) == map(f, x)
|
431 | 431 | end
|
432 | 432 | invtol = VectorizationBase.AVX512F ? 2^-14 : 1.5*2^-12 # moreaccurate with AVX512
|
|
470 | 470 | xi1 = tovector(vi1); xi2 = tovector(vi2);
|
471 | 471 | xi3 = mapreduce(tovector, vcat, m1.data);
|
472 | 472 | xi4 = mapreduce(tovector, vcat, m2.data);
|
473 |
| - for f ∈ [+, -, *, ÷, /, %, <<, >>, >>>, ⊻, &, |, VectorizationBase.rotate_left, VectorizationBase.rotate_right, copysign, max, min] |
| 473 | + for f ∈ [+, -, *, div, ÷, /, rem, %, <<, >>, >>>, ⊻, &, |, fld, mod, VectorizationBase.rotate_left, VectorizationBase.rotate_right, copysign, max, min] |
474 | 474 | # @show f
|
475 | 475 | check_within_limits(tovector(@inferred(f(vi1, vi2))), f.(xi1, xi2))
|
476 | 476 | check_within_limits(tovector(@inferred(f(j, vi2))), f.(j, xi2))
|
|
504 | 504 | @test tovector(@inferred(f(vf1, a))) ≈ f.(xf1, a)
|
505 | 505 | @test tovector(@inferred(f(vf2, a))) ≈ f.(xf2, a)
|
506 | 506 | end
|
507 |
| - |
| 507 | + |
| 508 | + vones, vi2f, vtwos = promote(1.0, vi2, 2f0); # promotes a binary function, right? Even when used with three args? |
| 509 | + @test vones === VectorizationBase.VecUnroll((vbroadcast(Val(W64), 1.0),vbroadcast(Val(W64), 1.0),vbroadcast(Val(W64), 1.0),vbroadcast(Val(W64), 1.0))); |
| 510 | + @test vtwos === VectorizationBase.VecUnroll((vbroadcast(Val(W64), 2.0),vbroadcast(Val(W64), 2.0),vbroadcast(Val(W64), 2.0),vbroadcast(Val(W64), 2.0))); |
| 511 | + @test VectorizationBase.vall(vi2f == vi2) |
| 512 | + W32 = StaticInt(W64)*StaticInt(2) |
| 513 | + vf2 = VectorizationBase.VecUnroll(( |
| 514 | + Vec(ntuple(_ -> Core.VecElement(randn(Float32)), W32)), |
| 515 | + Vec(ntuple(_ -> Core.VecElement(randn(Float32)), W32)) |
| 516 | + )) |
| 517 | + vones32, v2f32, vtwos32 = promote(1.0, vf2, 2f0); # promotes a binary function, right? Even when used with three args? |
| 518 | + @test vones32 === VectorizationBase.VecUnroll((vbroadcast(W32, 1f0),vbroadcast(W32, 1f0))) |
| 519 | + @test vtwos32 === VectorizationBase.VecUnroll((vbroadcast(W32, 2f0),vbroadcast(W32, 2f0))) |
| 520 | + @test vf2 === v2f32 |
| 521 | + |
| 522 | + @test tovector(clamp(m1, 2:i)) == clamp.(tovector(m1), 2, i) |
| 523 | + @test tovector(mod(m1, 1:i)) == mod1.(tovector(m1), i) |
| 524 | + |
508 | 525 | end
|
509 | 526 | @testset "Ternary Functions" begin
|
510 | 527 | v1 = Vec(ntuple(_ -> Core.VecElement(randn()), Val(W64)))
|
|
515 | 532 | m = Mask{W64}(0xce)
|
516 | 533 | mv = tovector(m)
|
517 | 534 | for f ∈ [
|
518 |
| - muladd, fma, |
| 535 | + muladd, fma, clamp, |
519 | 536 | VectorizationBase.vfmadd, VectorizationBase.vfnmadd, VectorizationBase.vfmsub, VectorizationBase.vfnmsub,
|
520 | 537 | VectorizationBase.vfmadd231, VectorizationBase.vfnmadd231, VectorizationBase.vfmsub231, VectorizationBase.vfnmsub231
|
521 | 538 | ]
|
|
560 | 577 | @test VectorizationBase.vprod(v2) * 3 == VectorizationBase.vprod(VectorizationBase.mulscalar(3, v2))
|
561 | 578 | @test VectorizationBase.vall(v1 + v2 == VectorizationBase.addscalar(v1, v2))
|
562 | 579 | @test 4.0 == VectorizationBase.addscalar(2.0, 2.0)
|
563 |
| - |
| 580 | + |
564 | 581 | v3 = Vec(0, 1, 2, 3); vu3 = VectorizationBase.VecUnroll((v3, v3 - 1))
|
565 | 582 | v4 = Vec(0.0, 1.0, 2.0, 3.0)
|
566 | 583 | v5 = Vec(0f0, 1f0, 2f0, 3f0, 4f0, 5f0, 6f0, 7f0)
|
|
591 | 608 | @test VectorizationBase.vzero() === VectorizationBase.vzero(W64S, Float64)
|
592 | 609 | @test VectorizationBase.vbroadcast(StaticInt(2)*W64S, one(Int64)) === VectorizationBase.vbroadcast(StaticInt(2)*W64S, one(Int32))
|
593 | 610 | @test VectorizationBase.vbroadcast(StaticInt(2)*W64S, one(UInt64)) === VectorizationBase.vbroadcast(StaticInt(2)*W64S, one(UInt32))
|
594 |
| - |
| 611 | + |
595 | 612 | @test VectorizationBase.vall(VectorizationBase.vbroadcast(W64S, pointer(A)) == vbroadcast(W64S, first(A)))
|
596 | 613 | @test VectorizationBase.vbroadcast(W64S, pointer(A,2)) === Vec{W64}(A[2]) === Vec(A[2])
|
597 | 614 |
|
|
630 | 647 | @test vtwos32 === VectorizationBase.VecUnroll((vbroadcast(StaticInt(W32), 2f0),vbroadcast(StaticInt(W32), 2f0)))
|
631 | 648 | @test vf2 === v2f32
|
632 | 649 |
|
633 |
| - |
| 650 | + |
634 | 651 | vm = if VectorizationBase.AVX512DQ
|
635 | 652 | VectorizationBase.VecUnroll((
|
636 | 653 | MM{W64}(rand(Int)),MM{W64}(rand(Int)),MM{W64}(rand(Int)),MM{W64}(rand(Int))
|
|
0 commit comments