@@ -146,8 +146,49 @@ array. The use of `@turbo` macro gives a significant performance boost.
146146- `dij_min`: The minimum value in the first `n` elements of the `dij` array.
147147- `best`: The index of the minimum value in the `dij` array.
148148"""
149- fast_findmin (dij, n) = begin
150- x = @fastmath foldl (min, @view (dij[begin : n]))
151- i = findfirst (== (x), dij):: Int
152- x, i
149+ function fast_findmin end
150+
151+ if Sys. ARCH == :aarch64
152+ function fast_findmin (dij, n)
153+ x = @fastmath foldl (min, @view (dij[begin : n]))
154+ i = findfirst (== (x), dij):: Int
155+ x, i
156+ end
157+ else
158+ function fast_findmin (dij:: DenseVector{T} , n) where {T}
159+ laneIndices = SIMD. Vec {8, Int} ((1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ))
160+ minvals = SIMD. Vec {8, T} (Inf )
161+ min_indices = SIMD. Vec {8, Int} (0 )
162+
163+ n_batches, remainder = divrem (n, 8 )
164+ lane = VecRange {8} (0 )
165+ i = 1
166+ @inbounds @fastmath for _ in 1 : n_batches
167+ dijs = dij[lane + i]
168+ predicate = dijs < minvals
169+ minvals = vifelse (predicate, dijs, minvals)
170+ min_indices = vifelse (predicate, laneIndices, min_indices)
171+
172+ i += 8
173+ laneIndices += 8
174+ end
175+
176+ min_value = SIMD. minimum (minvals)
177+ min_index = @inbounds min_value == minvals[1 ] ? min_indices[1 ] :
178+ min_value == minvals[2 ] ? min_indices[2 ] :
179+ min_value == minvals[3 ] ? min_indices[3 ] :
180+ min_value == minvals[4 ] ? min_indices[4 ] :
181+ min_value == minvals[5 ] ? min_indices[5 ] :
182+ min_value == minvals[6 ] ? min_indices[6 ] :
183+ min_value == minvals[7 ] ? min_indices[7 ] : min_indices[8 ]
184+
185+ @inbounds @fastmath for _ in 1 : remainder
186+ xi = dij[i]
187+ pred = dij[i] < min_value
188+ min_value = ifelse (pred, xi, min_value)
189+ min_index = ifelse (pred, i, min_index)
190+ i += 1
191+ end
192+ return min_value, min_index
193+ end
153194end
0 commit comments