Skip to content

Commit 053c5ec

Browse files
authored
Apple silicon workaround (#196)
* avoid reductions with custom operators for MPI * avoid MultiScalar reduction only on non-intel * move changes to mapreduce * small tweak * fix mpi errors * clean up * use MPI logical operator directly * Remove MultiScalar from sort_into_targets! fciqmc_col! may now return stats as a vector * Revert "Remove MultiScalar from sort_into_targets!" This reverts commit b54868f. --------- Co-authored-by: Joachim Brand <[email protected]>
1 parent 8819209 commit 053c5ec

File tree

7 files changed

+52
-19
lines changed

7 files changed

+52
-19
lines changed

src/RMPI/RMPI.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const mpi_registry = Dict{Int,Any}()
3131
abstract type DistributeStrategy end
3232

3333
include("mpidata.jl")
34+
include("multiscalar.jl")
3435
include("helpers.jl")
3536
include("noexchange.jl")
3637
include("pointtopoint.jl")

src/RMPI/helpers.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ end
8787

8888
function sort_into_targets!(dtarget::MPIData, w::AbstractDVec, stats)
8989
# single threaded MPI version
90-
mpi_combine_walkers!(dtarget,w) # combine walkers from different MPI ranks
90+
mpi_combine_walkers!(dtarget, w) # combine walkers from different MPI ranks
9191
res_stats = MPI.Allreduce(Rimu.MultiScalar(stats), +, dtarget.comm)
9292
return dtarget, w, res_stats
9393
end

src/RMPI/mpidata.jl

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,26 @@ end
9292

9393
function Base.mapreduce(f, op, it::MPIDataIterator; kwargs...)
9494
res = mapreduce(f, op, it.iter; kwargs...)
95-
return MPI.Allreduce(res, op, it.data.comm)
95+
T = typeof(res)
96+
if T <: Bool # MPI.jl does not support Bool reductions
97+
res = convert(UInt8, res)
98+
end
99+
return T(MPI.Allreduce(res, op, it.data.comm))
100+
end
101+
102+
# Special case for `sum`, which uses a custom (type-widening) reduction operator `add_sum`.
103+
# Replacing it by `+` is necessary for non-Intel architectures due to a limitation of
104+
# MPI.jl. On Intel processors, it might be more perfomant.
105+
# see https://github.com/JuliaParallel/MPI.jl/issues/404
106+
function Base.mapreduce(f, op::typeof(Base.add_sum), it::MPIDataIterator; kwargs...)
107+
res = mapreduce(f, op, it.iter; kwargs...)
108+
return MPI.Allreduce(res, +, it.data.comm)
109+
end
110+
111+
# Special case for `prod`, which uses a custom (type-widening) reduction operator `mul_prod`
112+
function Base.mapreduce(f, op::typeof(Base.mul_prod), it::MPIDataIterator; kwargs...)
113+
res = mapreduce(f, op, it.iter; kwargs...)
114+
return MPI.Allreduce(res, *, it.data.comm)
96115
end
97116

98117
Base.IteratorSize(::MPIDataIterator) = Base.SizeUnknown()

src/RMPI/multiscalar.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Make MPI reduction of a `MultiScalar` work on non-Intel processors.
2+
# The `MultiScalar` is converted into a vector before sending through MPI.Allreduce.
3+
# Testing shows that this is about the same speed or even a bit faster on Intel processors
4+
# than reducing the MultiScalar directly via a custom reduction operator.
5+
# Defining the method in RMPI is strictly type piracy as MultiScalar belongs to Rimu and
6+
# not to RMPI. Might clean this up later.
7+
function MPI.Allreduce(ms::Rimu.MultiScalar{T}, op, comm::MPI.Comm) where {T<:Tuple}
8+
result_vector = MPI.Allreduce([ms...], op, comm)
9+
return Rimu.MultiScalar(T(result_vector))
10+
end

src/helpers.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ end
3434
MultiScalar(args...) = MultiScalar(args)
3535
MultiScalar(v::SVector) = MultiScalar(Tuple(v))
3636
MultiScalar(m::MultiScalar) = m
37+
MultiScalar{T}(m::MultiScalar{T}) where T<:Tuple = m
3738
MultiScalar(arg) = MultiScalar((arg,))
3839

3940
Base.getindex(m::MultiScalar, i) = m.tuple[i]

test/RMPI.jl

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ using Test
66

77
@testset "DistributeStrategies" begin
88
# `DistributeStrategy`s
9-
ham = HubbardReal1D(BoseFS((1,2,3)))
9+
ham = HubbardReal1D(BoseFS((1, 2, 3)))
1010
for setup in [RMPI.mpi_no_exchange, RMPI.mpi_all_to_all, RMPI.mpi_point_to_point]
11-
dv = DVec(starting_address(ham)=>10; style=IsDynamicSemistochastic())
11+
dv = DVec(starting_address(ham) => 10; style=IsDynamicSemistochastic())
1212
v = MPIData(dv; setup)
13-
df, state = lomc!(ham,v)
13+
df, state = lomc!(ham, v)
1414
@test size(df) == (100, 12)
1515
end
1616
# need to do mpi_one_sided separately
17-
dv = DVec(starting_address(ham)=>10; style=IsDynamicSemistochastic())
18-
v = RMPI.mpi_one_sided(dv; capacity = 1000)
19-
df, state = lomc!(ham,v)
17+
dv = DVec(starting_address(ham) => 10; style=IsDynamicSemistochastic())
18+
v = RMPI.mpi_one_sided(dv; capacity=1000)
19+
df, state = lomc!(ham, v)
2020
@test size(df) == (100, 12)
2121
end
2222

@@ -29,13 +29,13 @@ end
2929
counts = zeros(Int, k)
3030
displs = zeros(Int, k)
3131

32-
RMPI.sort_and_count!(counts, displs, vals, ordfun.(vals), (0, k-1))
32+
RMPI.sort_and_count!(counts, displs, vals, ordfun.(vals), (0, k - 1))
3333
@test issorted(vals, by=ordfun)
3434
@test sum(counts) == l
3535

36-
for i in 0:(k - 1)
37-
c = counts[i + 1]
38-
d = displs[i + 1]
36+
for i in 0:(k-1)
37+
c = counts[i+1]
38+
d = displs[i+1]
3939
r = (1:c) .+ d
4040
ords = ordfun.(vals)
4141
@test all(ords[r] .== i)
@@ -79,7 +79,7 @@ end
7979
@testset "dot" begin
8080
@test dot(dv1, dv2) == 0
8181
@test dot(dv1, dv1) == dot(localpart(dv1), dv1)
82-
rand_ham = MatrixHamiltonian(rand(ComplexF64, 4,4))
82+
rand_ham = MatrixHamiltonian(rand(ComplexF64, 4, 4))
8383
ldv1 = localpart(dv1)
8484
@test norm(dot(dv1, rand_ham, dv1)) norm(dot(ldv1, rand_ham, ldv1))
8585
end

test/mpi_runtests.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ end
7171
end
7272
@testset "Single component $type" begin
7373
for i in 1:N_REPEATS
74-
add = BoseFS((0,0,10,0,0))
74+
add = BoseFS((0, 0, 10, 0, 0))
7575
H = HubbardMom1D(add)
7676
Random.seed!(7350 * i)
7777
v, dv = setup_dv(
@@ -98,7 +98,7 @@ end
9898
@test sum(values(v)) sum(values(dv))
9999
f((k, v)) = (k == add) + v > 0
100100
@test mapreduce(f, |, pairs(v); init=true) ==
101-
mapreduce(f, |, pairs(dv); init=true)
101+
mapreduce(f, |, pairs(dv); init=true)
102102
end
103103

104104
@testset "Operations" begin
@@ -127,7 +127,7 @@ end
127127
end
128128
@testset "Two-component $type" begin
129129
for i in 1:N_REPEATS
130-
add = BoseFS2C((0,0,10,0,0), (0,0,2,0,0))
130+
add = BoseFS2C((0, 0, 10, 0, 0), (0, 0, 2, 0, 0))
131131
H = BoseHubbardMom1D2C(add)
132132
Random.seed!(7350 * i)
133133
v, dv = setup_dv(
@@ -225,7 +225,7 @@ end
225225
(RMPI.mpi_one_sided, (; capacity=1000)),
226226
)
227227
@testset "Regular with $setup and post-steps" begin
228-
H = HubbardReal1D(BoseFS((1,1,1,1,1,1,1)); u=6.0)
228+
H = HubbardReal1D(BoseFS((1, 1, 1, 1, 1, 1, 1)); u=6.0)
229229
dv = MPIData(
230230
DVec(starting_address(H) => 3; style=IsDynamicSemistochastic());
231231
setup,
@@ -253,7 +253,7 @@ end
253253
@test all(0 .≤ df.loneliness .≤ 1)
254254
end
255255
@testset "Initiator with $setup" begin
256-
H = HubbardMom1D(BoseFS((0,0,0,7,0,0,0)); u=6.0)
256+
H = HubbardMom1D(BoseFS((0, 0, 0, 7, 0, 0, 0)); u=6.0)
257257
dv = MPIData(
258258
InitiatorDVec(starting_address(H) => 3);
259259
setup,
@@ -295,7 +295,9 @@ end
295295

296296
# Make sure all ranks came this far.
297297
@testset "Finish" begin
298-
@test MPI.Allreduce(true, &, mpi_comm())
298+
# MPI.jl currently doesn't properly map logical operators (MPI v0.20.8)
299+
@test MPI.Allreduce(true, MPI.LAND, mpi_comm())
300+
# @test MPI.Allreduce(true, &, mpi_comm())
299301
end
300302
end
301303

0 commit comments

Comments
 (0)