Skip to content

Commit f8c7b21

Browse files
committed
Make better use of Metal APIs
[only benchmark]
1 parent 6997e40 commit f8c7b21

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

perf/byval.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const threads = 256
66

77
# simple add matrixes kernel
88
function kernel_add_mat(n, x1, x2, y)
9-
i = (threadgroup_position_in_grid_2d().x-1) * threadgroups_per_grid_2d().x + thread_position_in_threadgroup_2d().x
9+
i = thread_position_in_grid_1d()
1010
if i <= n
1111
@inbounds y[i] = x1[i] + x2[i]
1212
end
@@ -20,7 +20,7 @@ end
2020
# add arrays of matrixes kernel
2121
function kernel_add_mat_z_slices(n, vararg...)
2222
x1, x2, y = get_inputs3(threadgroup_position_in_grid_2d().y, vararg...)
23-
i = (threadgroup_position_in_grid_2d().x-1) * threadgroups_per_grid_2d().x + thread_position_in_threadgroup_2d().x
23+
i = thread_position_in_grid_1d()
2424
if i <= n
2525
@inbounds y[i] = x1[i] + x2[i]
2626
end

perf/kernel.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@ group["launch"] = @benchmarkable @metal identity(nothing)
1313
src = Metal.rand(Float32, 512, 1000)
1414
dest = similar(src)
1515
function indexing_kernel(dest, src)
16-
i = (threadgroup_position_in_grid_2d().x-1) * threadgroups_per_grid_2d().x + thread_position_in_threadgroup_2d().x
16+
i = thread_position_in_grid_1d()
1717
@inbounds dest[i] = src[i]
1818
return
1919
end
2020
group["indexing"] = @async_benchmarkable @metal threads=size(src,1) groups=size(src,2) $indexing_kernel($dest, $src)
2121

2222
function checked_indexing_kernel(dest, src)
23-
i = (threadgroup_position_in_grid_2d().x-1) * threadgroups_per_grid_2d().x + thread_position_in_threadgroup_2d().x
23+
i = thread_position_in_grid_1d().x-1
2424
dest[i] = src[i]
2525
return
2626
end
2727
group["indexing_checked"] = @async_benchmarkable @metal threads=size(src,1) groups=size(src,2) $checked_indexing_kernel($dest, $src)
2828

2929
## DELETE
3030
# function rand_kernel(dest::AbstractArray{T}) where {T}
31-
# i = (threadgroup_position_in_grid_2d().x-1) * threadgroups_per_grid_2d().x + thread_position_in_threadgroup_2d().x
31+
# i = thread_position_in_grid_1d()
3232
# dest[i] = Metal.rand(T)
3333
# return
3434
# end

0 commit comments

Comments
 (0)