Skip to content

Structured matrix multiplication #814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Sep 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2eba36d
structured matrix multiplication pt 1
mateuszbaran Aug 11, 2020
6dc5694
mul! doesn't always take parent; allocations on Julia 1.5
mateuszbaran Aug 11, 2020
6eaf1e1
updating triangular matrix multiplication to the new scheme
mateuszbaran Aug 12, 2020
56c9ca5
more tests for multiplication
mateuszbaran Aug 12, 2020
e725e4d
adjoint and transpose wrappers for multiplication; more documentation
mateuszbaran Aug 13, 2020
49d1043
partical unification of in-placed and out-of-place matrix multiplication
mateuszbaran Aug 13, 2020
9fece75
more matrix types for multiplication
mateuszbaran Aug 13, 2020
246c256
Merge branch 'master' of https://github.com/JuliaArrays/StaticArrays.…
mateuszbaran Aug 13, 2020
f39afe0
fixed code for symmetric and hermitian multiplication and small cleanup
mateuszbaran Aug 14, 2020
f41d74c
some work on in-place structured multiplication
mateuszbaran Aug 18, 2020
8dd4523
minor fixes
mateuszbaran Aug 18, 2020
3e96bbc
optimized multiplication by triangular matrices
mateuszbaran Aug 18, 2020
8e11852
blas mul! fix and matrix multiplication benchmarks
mateuszbaran Aug 19, 2020
767aa19
small matmul benchmark fix
mateuszbaran Aug 19, 2020
f295ba6
slightly relaxing allocation tests
mateuszbaran Aug 20, 2020
e6667bf
adding Diagonal to the new matrix multiplication scheme
mateuszbaran Aug 22, 2020
0ff9b55
slight adjustments to matrix multiplication
mateuszbaran Aug 23, 2020
fd8cd5c
modified matrix multiplication heuristics
mateuszbaran Aug 24, 2020
960beb6
matmul muladd and improved order.
chriselrod Aug 24, 2020
ef1ba23
14 -> 12 for loopmul decisions.
chriselrod Aug 24, 2020
86eab40
BLAS decision should be for larger than 14x14, if anything.
chriselrod Aug 24, 2020
4fd4e9d
fixing matmul benchmark
mateuszbaran Aug 24, 2020
59b4a9b
by default use a reduced set of matmul benchmarks
mateuszbaran Aug 24, 2020
094ea90
Merge branch 'muladdmul' into mbaran/matmul-symmetric
mateuszbaran Aug 25, 2020
5e999c7
muladd in combine_products
mateuszbaran Aug 25, 2020
6229999
formatting fix
mateuszbaran Aug 25, 2020
4d3f9ca
small cleanup in matmul benchmarks
mateuszbaran Aug 26, 2020
373032a
Merge branch 'master' into mbaran/matmul-symmetric
mateuszbaran Sep 16, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions benchmark/bench_mat_mul.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
module BenchmarkMatMul

using StaticArrays
using BenchmarkTools
using LinearAlgebra
using Printf

suite = BenchmarkGroup()

mul_wrappers = [
(m -> m, "ident "),
(m -> Symmetric(m, :U), "sym-u "),
(m -> Hermitian(m, :U), "herm-u "),
(m -> UpperTriangular(m), "up-tri "),
(m -> LowerTriangular(m), "lo-tri "),
(m -> UnitUpperTriangular(m), "uup-tri"),
(m -> UnitLowerTriangular(m), "ulo-tri"),
(m -> Adjoint(m), "adjoint"),
(m -> Transpose(m), "transpo"),
(m -> Diagonal(m), "diag ")]

mul_wrappers_reduced = [
(m -> m, "ident "),
(m -> Symmetric(m, :U), "sym-u "),
(m -> UpperTriangular(m), "up-tri "),
(m -> Transpose(m), "transpo"),
(m -> Diagonal(m), "diag ")]

for N in [2, 4, 8, 10, 16]

matvecstr = @sprintf("mat-vec %2d", N)
matmatstr = @sprintf("mat-mat %2d", N)
matvec_mut_str = @sprintf("mat-vec! %2d", N)
matmat_mut_str = @sprintf("mat-mat! %2d", N)

suite[matvecstr] = BenchmarkGroup()
suite[matmatstr] = BenchmarkGroup()
suite[matvec_mut_str] = BenchmarkGroup()
suite[matmat_mut_str] = BenchmarkGroup()


A = randn(SMatrix{N,N,Float64})
B = randn(SMatrix{N,N,Float64})
bv = randn(SVector{N,Float64})
for (wrapper_a, wrapper_name) in mul_wrappers_reduced
thrown = false
try
wrapper_a(A) * bv
catch e
thrown = true
end
if !thrown
suite[matvecstr][wrapper_name] = @benchmarkable $(Ref(wrapper_a(A)))[] * $(Ref(bv))[]
end
end

for (wrapper_a, wrapper_a_name) in mul_wrappers, (wrapper_b, wrapper_b_name) in mul_wrappers
thrown = false
try
wrapper_a(A) * wrapper_b(B)
catch e
thrown = true
end
if !thrown
suite[matmatstr][wrapper_a_name * " * " * wrapper_b_name] = @benchmarkable $(Ref(wrapper_a(A)))[] * $(Ref(wrapper_b(B)))[]
end
end

C = randn(MMatrix{N,N,Float64})
cv = randn(MVector{N,Float64})

for (wrapper_a, wrapper_name) in mul_wrappers
thrown = false
try
mul!(cv, wrapper_a(A), bv)
catch e
thrown = true
end
if !thrown
suite[matvec_mut_str][wrapper_name] = @benchmarkable mul!($cv, $(Ref(wrapper_a(A)))[], $(Ref(bv))[])
end
end

for (wrapper_a, wrapper_a_name) in mul_wrappers, (wrapper_b, wrapper_b_name) in mul_wrappers
thrown = false
try
mul!(C, wrapper_a(A), wrapper_b(B))
catch e
thrown = true
end
if !thrown
suite[matmat_mut_str][wrapper_a_name * " * " * wrapper_b_name] = @benchmarkable mul!($C, $(Ref(wrapper_a(A)))[], $(Ref(wrapper_b(B)))[])
end
end
end

function run_and_save(fname, make_params = true)
if make_params
tune!(suite)
BenchmarkTools.save("params.json", params(suite))
else
loadparams!(suite, BenchmarkTools.load("params.json")[1], :evals, :samples)
end
results = run(suite, verbose = true)
BenchmarkTools.save(fname, results)
end

function judge_results(m1, m2)
results = Any[]
for key1 in keys(m1)
if !haskey(m2, key1)
continue
end
for key2 in keys(m1[key1])
if !haskey(m2[key1], key2)
continue
end
push!(results, (key1, key2, judge(median(m1[key1][key2]), median(m2[key1][key2]))))
end
end
return results
end

function full_benchmark(mul_wrappers, size_iter = 1:4, T = Float64)
suite_full = BenchmarkGroup()
for N in size_iter
for M in size_iter
a = randn(SMatrix{N,M,T})
wrappers_a = N == M ? mul_wrappers : [mul_wrappers[1]]
sa = Size(a)
for K in size_iter
b = randn(SMatrix{M,K,T})
wrappers_b = M == K ? mul_wrappers : [mul_wrappers[1]]
sb = Size(b)
for (w_a, w_a_name) in wrappers_a
for (w_b, w_b_name) in wrappers_b
cur_str = @sprintf("mat-mat %s %s generic (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
suite_full[cur_str] = @benchmarkable StaticArrays.mul_generic($sa, $sb, $(Ref(w_a(a)))[], $(Ref(w_b(b)))[])
cur_str = @sprintf("mat-mat %s %s default (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
suite_full[cur_str] = @benchmarkable StaticArrays._mul($sa, $sb, $(Ref(w_a(a)))[], $(Ref(w_b(b)))[])
cur_str = @sprintf("mat-mat %s %s unrolled (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
suite_full[cur_str] = @benchmarkable StaticArrays.mul_unrolled($sa, $sb, $(Ref(w_a(a)))[], $(Ref(w_b(b)))[])
if w_a_name != "diag " && w_b_name != "diag "
cur_str = @sprintf("mat-mat %s %s chunks (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
suite_full[cur_str] = @benchmarkable StaticArrays.mul_unrolled_chunks($sa, $sb, $(Ref(w_a(a)))[], $(Ref(w_b(b)))[])
end
if w_a_name == "ident " && w_b_name == "ident "
cur_str = @sprintf("mat-mat %s %s loop (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
suite_full[cur_str] = @benchmarkable StaticArrays.mul_loop($sa, $sb, $(Ref(w_a(a)))[], $(Ref(w_b(b)))[])
end
end
end
end
end
end
results = run(suite_full, verbose = true)
results_median = map(collect(results)) do res
return (res[1], median(res[2]).time)
end
return results_median
end

function judge_this(new_time, old_time, tol, w_a_name, w_b_name, N, M, K, which)
if new_time*tol < old_time
msg = @sprintf("better for %s %s (%2d, %2d) x (%2d, %2d): %s", w_a_name, w_b_name, N, M, M, K, which)
println(msg)
println(">> ", new_time, " | ", old_time)
end
end

function pick_best(results, mul_wrappers, size_iter; tol = 1.2)
for N in size_iter
for M in size_iter
wrappers_a = N == M ? mul_wrappers : [mul_wrappers[1]]
for K in size_iter
wrappers_b = M == K ? mul_wrappers : [mul_wrappers[1]]
for (w_a, w_a_name) in wrappers_a
for (w_b, w_b_name) in wrappers_b
cur_default = @sprintf("mat-mat %s %s default (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
default_time = results[cur_default]

cur_generic = @sprintf("mat-mat %s %s generic (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
generic_time = results[cur_generic]
judge_this(generic_time, default_time, tol, w_a_name, w_b_name, N, M, K, "generic")

cur_unrolled = @sprintf("mat-mat %s %s unrolled (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
unrolled_time = results[cur_unrolled]
judge_this(unrolled_time, default_time, tol, w_a_name, w_b_name, N, M, K, "unrolled")

if w_a_name != "diag " && w_b_name != "diag "
cur_chunks = @sprintf("mat-mat %s %s chunks (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
chunk_time = results[cur_chunks]
judge_this(chunk_time, default_time, tol, w_a_name, w_b_name, N, M, K, "chunks")
end
if w_a_name == "ident " && w_b_name == "ident "
cur_loop = @sprintf("mat-mat %s %s loop (%2d, %2d) x (%2d, %2d)", w_a_name, w_b_name, N, M, M, K)
loop_time = results[cur_loop]
judge_this(loop_time, default_time, tol, w_a_name, w_b_name, N, M, K, "loop")
end
end
end
end
end
end
end

function run_1()
return full_benchmark(mul_wrappers_reduced, [2, 3, 4, 5, 8, 9, 14, 16])
end

end #module
BenchmarkMatMul.suite
2 changes: 0 additions & 2 deletions src/SDiagonal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ size(::Type{<:SDiagonal{N}}) where {N} = (N,N)
size(::Type{<:SDiagonal{N}}, d::Int) where {N} = d > 2 ? 1 : N

# define specific methods to avoid allocating mutable arrays
*(A::StaticMatrix, D::SDiagonal) = A .* transpose(D.diag)
*(D::SDiagonal, A::StaticMatrix) = D.diag .* A
\(D::SDiagonal, b::AbstractVector) = D.diag .\ b
\(D::SDiagonal, b::StaticVector) = D.diag .\ b # catch ambiguity

Expand Down
Loading